summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorKristian Høgsberg Kristensen <[email protected]>2016-02-24 12:50:27 -0800
committerKristian Høgsberg Kristensen <[email protected]>2016-02-24 13:04:54 -0800
commit59f57289959702e528b68bdd0d06488089517a00 (patch)
treea266656ac6129f5ad14b9d0d0c69c7077466c3f3 /src/mesa
parent25c2470b24ce8411f6747eb887137b2511b6d529 (diff)
parentc95d5c5f6fbfe4a96276e67ed279562b33432fb5 (diff)
Merge remote-tracking branch 'origin/master' into vulkan
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Makefile.sources3
-rw-r--r--src/mesa/drivers/common/meta_copy_image.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_tex_validate.c1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_compute.c96
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c23
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp39
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c23
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp50
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_builder.h17
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c43
-rw-r--r--src/mesa/drivers/dri/i965/gen6_constant_state.c190
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c89
-rw-r--r--src/mesa/drivers/dri/i965/gen7_l3_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c75
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c62
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_copy_image.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c217
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.h24
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_bitmap.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_read.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_image.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_subimage.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_validate.c1
-rw-r--r--src/mesa/main/attrib.c4
-rw-r--r--src/mesa/main/bufferobj.c4
-rw-r--r--src/mesa/main/buffers.c65
-rw-r--r--src/mesa/main/buffers.h14
-rw-r--r--src/mesa/main/clear.c87
-rw-r--r--src/mesa/main/compute.c3
-rw-r--r--src/mesa/main/config.h11
-rw-r--r--src/mesa/main/context.c8
-rw-r--r--src/mesa/main/dd.h9
-rw-r--r--src/mesa/main/dlist.c385
-rw-r--r--src/mesa/main/dlist.h38
-rw-r--r--src/mesa/main/enable.c4
-rw-r--r--src/mesa/main/extensions_table.h4
-rw-r--r--src/mesa/main/fbobject.c5
-rw-r--r--src/mesa/main/format_utils.c74
-rw-r--r--src/mesa/main/formatquery.c5
-rw-r--r--src/mesa/main/genmipmap.c2
-rw-r--r--src/mesa/main/get.c2
-rw-r--r--src/mesa/main/get_hash_params.py14
-rw-r--r--src/mesa/main/glformats.c8
-rw-r--r--src/mesa/main/image.c11
-rw-r--r--src/mesa/main/imports.c8
-rw-r--r--src/mesa/main/imports.h3
-rw-r--r--src/mesa/main/mipmap.c21
-rw-r--r--src/mesa/main/mtypes.h17
-rw-r--r--src/mesa/main/pipelineobj.c5
-rw-r--r--src/mesa/main/program_resource.c4
-rw-r--r--src/mesa/main/readpix.c2
-rw-r--r--src/mesa/main/samplerobj.c6
-rw-r--r--src/mesa/main/shaderapi.c1
-rw-r--r--src/mesa/main/shared.c15
-rw-r--r--src/mesa/main/tests/dispatch_sanity.cpp10
-rw-r--r--src/mesa/main/texgetimage.c14
-rw-r--r--src/mesa/main/teximage.c50
-rw-r--r--src/mesa/main/teximage.h60
-rw-r--r--src/mesa/main/texobj.c33
-rw-r--r--src/mesa/main/texobj.h19
-rw-r--r--src/mesa/main/texparam.c34
-rw-r--r--src/mesa/main/texstate.c2
-rw-r--r--src/mesa/main/texstore.c3
-rw-r--r--src/mesa/main/textureview.c9
-rw-r--r--src/mesa/program/prog_statevars.c4
-rw-r--r--src/mesa/state_tracker/st_atom.c60
-rw-r--r--src/mesa/state_tracker/st_atom.h17
-rw-r--r--src/mesa/state_tracker/st_atom_atomicbuf.c26
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.c46
-rw-r--r--src/mesa/state_tracker/st_atom_image.c236
-rw-r--r--src/mesa/state_tracker/st_atom_sampler.c8
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c32
-rw-r--r--src/mesa/state_tracker/st_atom_storagebuf.c21
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c62
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c266
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c162
-rw-r--r--src/mesa/state_tracker/st_cb_compute.c85
-rw-r--r--src/mesa/state_tracker/st_cb_compute.h38
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c305
-rw-r--r--src/mesa/state_tracker/st_cb_drawtex.c62
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c13
-rw-r--r--src/mesa/state_tracker/st_cb_msaa.c2
-rw-r--r--src/mesa/state_tracker/st_cb_program.c30
-rw-r--r--src/mesa/state_tracker/st_cb_rasterpos.c2
-rw-r--r--src/mesa/state_tracker/st_cb_readpixels.c6
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c142
-rw-r--r--src/mesa/state_tracker/st_context.c56
-rw-r--r--src/mesa/state_tracker/st_context.h35
-rw-r--r--src/mesa/state_tracker/st_draw.c96
-rw-r--r--src/mesa/state_tracker/st_draw.h7
-rw-r--r--src/mesa/state_tracker/st_draw_feedback.c2
-rw-r--r--src/mesa/state_tracker/st_extensions.c69
-rw-r--r--src/mesa/state_tracker/st_format.c42
-rw-r--r--src/mesa/state_tracker/st_format.h8
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp338
-rw-r--r--src/mesa/state_tracker/st_manager.c1
-rw-r--r--src/mesa/state_tracker/st_program.c105
-rw-r--r--src/mesa/state_tracker/st_program.h41
114 files changed, 3338 insertions, 1252 deletions
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 6669f295399..a6c12c64828 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -405,6 +405,7 @@ STATETRACKER_FILES = \
state_tracker/st_atom_depth.c \
state_tracker/st_atom_framebuffer.c \
state_tracker/st_atom.h \
+ state_tracker/st_atom_image.c \
state_tracker/st_atom_msaa.c \
state_tracker/st_atom_pixeltransfer.c \
state_tracker/st_atom_rasterizer.c \
@@ -427,6 +428,8 @@ STATETRACKER_FILES = \
state_tracker/st_cb_bufferobjects.h \
state_tracker/st_cb_clear.c \
state_tracker/st_cb_clear.h \
+ state_tracker/st_cb_compute.c \
+ state_tracker/st_cb_compute.h \
state_tracker/st_cb_condrender.c \
state_tracker/st_cb_condrender.h \
state_tracker/st_cb_copyimage.c \
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 6534d432e61..57c3f686b0c 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -203,7 +203,7 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
src_tex_image = wrap_renderbuffer(ctx, src_renderbuffer);
}
if (!make_view(ctx, src_tex_image, &src_view_tex_image, &src_view_texture,
- dst_tex_image->InternalFormat))
+ dst_internal_format))
goto cleanup;
}
diff --git a/src/mesa/drivers/dri/i915/intel_tex_validate.c b/src/mesa/drivers/dri/i915/intel_tex_validate.c
index 224a67fc6eb..c2111e5e209 100644
--- a/src/mesa/drivers/dri/i915/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_validate.c
@@ -1,6 +1,7 @@
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/samplerobj.h"
+#include "main/teximage.h"
#include "main/texobj.h"
#include "intel_context.h"
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 45f7a91295d..8f92fd7cfd2 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -175,6 +175,7 @@ i965_FILES = \
gen6_blorp.cpp \
gen6_cc.c \
gen6_clip_state.c \
+ gen6_constant_state.c \
gen6_depth_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index c7cb3944870..05fff91ed57 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -70,8 +70,13 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
* the destination buffer because we use the standard render path to render
* to destination color buffers, and the standard render path is
* fast-color-aware.
+ * Lossless compression is only introduced for gen9 onwards whereas
+ * blorp is not supported even for gen8. Therefore it should be impossible
+ * to end up here with single sampled compressed surfaces.
*/
- intel_miptree_resolve_color(brw, src_mt);
+ assert(!intel_miptree_is_lossless_compressed(brw, src_mt));
+ assert(!intel_miptree_is_lossless_compressed(brw, dst_mt));
+ intel_miptree_resolve_color(brw, src_mt, 0);
intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_layer);
intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_layer);
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index d9f181a1cbf..0d2aca5b78a 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -35,6 +35,84 @@
static void
+prepare_indirect_gpgpu_walker(struct brw_context *brw)
+{
+ GLintptr indirect_offset = brw->compute.num_work_groups_offset;
+ drm_intel_bo *bo = brw->compute.num_work_groups_bo;
+
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 0);
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 4);
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 8);
+
+ if (brw->gen > 7)
+ return;
+
+ /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
+ BEGIN_BATCH(7);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2));
+ OUT_BATCH(MI_PREDICATE_SRC0 + 4);
+ OUT_BATCH(0u);
+ OUT_BATCH(MI_PREDICATE_SRC1 + 0);
+ OUT_BATCH(0u);
+ OUT_BATCH(MI_PREDICATE_SRC1 + 4);
+ OUT_BATCH(0u);
+ ADVANCE_BATCH();
+
+ /* Load compute_dispatch_indirect_x_size into SRC0 */
+ brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ indirect_offset + 0);
+
+ /* predicate = (compute_dispatch_indirect_x_size == 0); */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_SET |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+ ADVANCE_BATCH();
+
+ /* Load compute_dispatch_indirect_y_size into SRC0 */
+ brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ indirect_offset + 4);
+
+ /* predicate |= (compute_dispatch_indirect_y_size == 0); */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_OR |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+ ADVANCE_BATCH();
+
+ /* Load compute_dispatch_indirect_z_size into SRC0 */
+ brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ indirect_offset + 8);
+
+ /* predicate |= (compute_dispatch_indirect_z_size == 0); */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_OR |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+ ADVANCE_BATCH();
+
+ /* predicate = !predicate; */
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ MI_PREDICATE_LOADOP_LOADINV |
+ MI_PREDICATE_COMBINEOP_OR |
+ MI_PREDICATE_COMPAREOP_FALSE);
+ ADVANCE_BATCH();
+}
+
+static void
brw_emit_gpgpu_walker(struct brw_context *brw)
{
const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
@@ -45,20 +123,10 @@ brw_emit_gpgpu_walker(struct brw_context *brw)
if (brw->compute.num_work_groups_bo == NULL) {
indirect_flag = 0;
} else {
- GLintptr indirect_offset = brw->compute.num_work_groups_offset;
- drm_intel_bo *bo = brw->compute.num_work_groups_bo;
-
- indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
-
- brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- indirect_offset + 0);
- brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- indirect_offset + 4);
- brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- indirect_offset + 8);
+ indirect_flag =
+ GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE |
+ (brw->gen == 7 ? GEN7_GPGPU_PREDICATE_ENABLE : 0);
+ prepare_indirect_gpgpu_walker(brw);
}
const unsigned simd_size = prog_data->simd_size;
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 44d2fe4d9e4..31b6b2a3641 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -208,7 +208,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
if (!tex_obj || !tex_obj->mt)
continue;
intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
- intel_miptree_resolve_color(brw, tex_obj->mt);
+ /* Sampling engine understands lossless compression and resolving
+ * those surfaces should be skipped for performance reasons.
+ */
+ intel_miptree_resolve_color(brw, tex_obj->mt,
+ INTEL_MIPTREE_IGNORE_CCS_E);
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
}
@@ -223,7 +227,13 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
tex_obj = intel_texture_object(u->TexObj);
if (tex_obj && tex_obj->mt) {
- intel_miptree_resolve_color(brw, tex_obj->mt);
+ /* Access to images is implemented using indirect messages
+ * against data port. Normal render target write understands
+ * lossless compression but unfortunately the typed/untyped
+ * read/write interface doesn't. Therefore the compressed
+ * surfaces need to be resolved prior to accessing them.
+ */
+ intel_miptree_resolve_color(brw, tex_obj->mt, 0);
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
}
}
@@ -252,7 +262,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
_mesa_get_srgb_format_linear(mt->format) == mt->format)
continue;
- intel_miptree_resolve_color(brw, mt);
+ /* Lossless compression is not supported for SRGB formats, it
+ * should be impossible to get here with such surfaces.
+ */
+ assert(!intel_miptree_is_lossless_compressed(brw, mt));
+ intel_miptree_resolve_color(brw, mt, 0);
brw_render_cache_set_check_flush(brw, mt->bo);
}
}
@@ -710,6 +724,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_thread
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
+ ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
}
/**
@@ -1227,7 +1242,7 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
if (rb == NULL || rb->mt == NULL)
continue;
if (rb->mt->num_samples <= 1)
- intel_miptree_resolve_color(brw, rb->mt);
+ intel_miptree_resolve_color(brw, rb->mt, 0);
else
intel_renderbuffer_downsample(brw, rb);
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 8416a1d4708..8ef5afea149 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2729,6 +2729,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6)
+# define GEN9_PS_RENDER_TARGET_RESOLVE_FULL (3 << 6)
# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
@@ -2939,6 +2940,7 @@ enum brw_wm_barycentric_interp_mode {
#define GPGPU_WALKER 0x7105
/* GEN7 DW0 */
# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
+# define GEN7_GPGPU_PREDICATE_ENABLE (1 << 8)
/* GEN8+ DW2 */
# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 05872255865..38a27da898c 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -384,20 +384,19 @@ static const struct brw_device_info brw_device_info_bxt = {
.gt = 1,
.has_llc = false,
- /* XXX: These are preliminary thread counts and URB sizes. */
.num_slices = 1,
- .max_vs_threads = 56,
- .max_hs_threads = 56,
- .max_ds_threads = 56,
- .max_gs_threads = 56,
+ .max_vs_threads = 112,
+ .max_hs_threads = 112,
+ .max_ds_threads = 112,
+ .max_gs_threads = 112,
.max_wm_threads = 64 * 3,
- .max_cs_threads = 28,
+ .max_cs_threads = 6 * 6,
.urb = {
- .size = 64,
+ .size = 192,
.min_vs_entries = 34,
- .max_vs_entries = 640,
- .max_hs_entries = 80,
- .max_ds_entries = 80,
+ .max_vs_entries = 704,
+ .max_hs_entries = 256,
+ .max_ds_entries = 416,
.max_gs_entries = 256,
}
};
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 23e71fd9ba6..afa8a4e9eae 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -391,6 +391,15 @@ brw_try_draw_prims(struct gl_context *ctx,
if (ctx->NewState)
_mesa_update_state(ctx);
+ /* We have to validate the textures *before* checking for fallbacks;
+ * otherwise, the software fallback won't be able to rely on the
+ * texture state, the firstLevel and lastLevel fields won't be
+ * set in the intel texture object (they'll both be 0), and the
+ * software fallback will segfault if it attempts to access any
+ * texture level other than level 0.
+ */
+ brw_validate_textures(brw);
+
/* Find the highest sampler unit used by each shader program. A bit-count
* won't work since ARB programs use the texture unit number as the sampler
* index.
@@ -406,15 +415,6 @@ brw_try_draw_prims(struct gl_context *ctx,
brw->vs.base.sampler_count =
_mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);
- /* We have to validate the textures *before* checking for fallbacks;
- * otherwise, the software fallback won't be able to rely on the
- * texture state, the firstLevel and lastLevel fields won't be
- * set in the intel texture object (they'll both be 0), and the
- * software fallback will segfault if it attempts to access any
- * texture level other than level 0.
- */
- brw_validate_textures(brw);
-
intel_prepare_render(brw);
/* This workaround has to happen outside of brw_upload_render_state()
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index bb22cfa5fab..81a83400ea0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3524,6 +3524,36 @@ fs_visitor::lower_integer_multiplication()
return progress;
}
+bool
+fs_visitor::lower_minmax()
+{
+ assert(devinfo->gen < 6);
+
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ const fs_builder ibld(this, block, inst);
+
+ if (inst->opcode == BRW_OPCODE_SEL &&
+ inst->predicate == BRW_PREDICATE_NONE) {
+ /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+ * the original SEL.L/GE instruction
+ */
+ ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+ inst->conditional_mod);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
static void
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
fs_reg *dst, fs_reg color, unsigned components)
@@ -5134,7 +5164,7 @@ fs_visitor::optimize()
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s%d-%s-00-start",
+ snprintf(filename, 64, "%s%d-%s-00-00-start",
stage_abbrev, dispatch_width, nir->info.name);
backend_shader::dump_instructions(filename);
@@ -5187,6 +5217,13 @@ fs_visitor::optimize()
OPT(opt_combine_constants);
OPT(lower_integer_multiplication);
+ if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+ OPT(opt_cmod_propagation);
+ OPT(opt_cse);
+ OPT(opt_copy_propagate);
+ OPT(dead_code_eliminate);
+ }
+
lower_uniform_pull_constant_loads();
validate();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 89fbbfc90e4..f1a81c13ef9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -175,6 +175,7 @@ public:
bool lower_load_payload();
bool lower_logical_sends();
bool lower_integer_multiplication();
+ bool lower_minmax();
bool lower_simd_width();
bool opt_combine_constants();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index dd3c383a17d..9a3cc3a50c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -369,20 +369,14 @@ namespace brw {
*
* Generally useful to get the minimum or maximum of two values.
*/
- void
+ instruction *
emit_minmax(const dst_reg &dst, const src_reg &src0,
const src_reg &src1, brw_conditional_mod mod) const
{
assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
- if (shader->devinfo->gen >= 6) {
- set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
- fix_unsigned_negate(src1)));
- } else {
- CMP(null_reg_d(), src0, src1, mod);
- set_predicate(BRW_PREDICATE_NORMAL,
- SEL(dst, src0, src1));
- }
+ return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index fd25307c587..9dbe13df514 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -87,7 +87,7 @@ public:
void setup_initial_values();
void run();
- void dump_block_data() const;
+ void dump_block_data() const UNUSED;
void *mem_ctx;
cfg_t *cfg;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 956cf53fd6c..8611b8dc443 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1015,28 +1015,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_fmin:
case nir_op_imin:
case nir_op_umin:
- if (devinfo->gen >= 6) {
- inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_L;
- } else {
- bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
- inst = bld.SEL(result, op[0], op[1]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
+ inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_L);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fmax:
case nir_op_imax:
case nir_op_umax:
- if (devinfo->gen >= 6) {
- inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- } else {
- bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
- inst = bld.SEL(result, op[0], op[1]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
+ inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_GE);
inst->saturate = instr->dest.saturate;
break;
@@ -1318,7 +1304,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
static brw_reg_type
get_image_base_type(const glsl_type *type)
{
- switch ((glsl_base_type)type->sampler_type) {
+ switch ((glsl_base_type)type->sampled_type) {
case GLSL_TYPE_UINT:
return BRW_REGISTER_TYPE_UD;
case GLSL_TYPE_INT:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 9b0750026c4..4adffdd75fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -1143,7 +1143,7 @@ namespace brw {
if (rsize && pred)
set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
- return tmp;
+ return retype(tmp, src0.type);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index b2b07e7e58e..488fa6c0c45 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -228,7 +228,9 @@ get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
unsigned int x_align, y_align;
unsigned int x_scaledown, y_scaledown;
- if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) {
+ /* Only single sampled surfaces need to (and actually can) be resolved. */
+ if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE ||
+ intel_miptree_is_lossless_compressed(brw, irb->mt)) {
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* Target(s)", beneath the "Fast Color Clear" bullet (p327):
*
@@ -872,7 +874,10 @@ brw_meta_resolve_color(struct brw_context *brw,
* bits to let us select the type of resolve. For fast clear resolves, it
* turns out we can use the same value as pre-SKL though.
*/
- set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
+ if (intel_miptree_is_lossless_compressed(brw, mt))
+ set_fast_clear_op(brw, GEN9_PS_RENDER_TARGET_RESOLVE_FULL);
+ else
+ set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
get_resolve_rect(brw, mt, &rect);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index f44ccd6e071..6b85eac77d6 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -76,6 +76,7 @@ extern const struct brw_tracked_state brw_tcs_samplers;
extern const struct brw_tracked_state brw_tes_samplers;
extern const struct brw_tracked_state brw_gs_samplers;
extern const struct brw_tracked_state brw_cs_samplers;
+extern const struct brw_tracked_state brw_cs_texture_surfaces;
extern const struct brw_tracked_state brw_vs_ubo_surfaces;
extern const struct brw_tracked_state brw_vs_abo_surfaces;
extern const struct brw_tracked_state brw_vs_image_surfaces;
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index cac06fa8918..c6aa1344270 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -387,8 +387,27 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
/* We need to make sure that the programs get regenerated, since
* any offsets leftover in brw_context will no longer be valid.
*/
- brw->NewGLState |= ~0;
- brw->ctx.NewDriverState |= ~0ull;
+ brw->NewGLState = ~0;
+ brw->ctx.NewDriverState = ~0ull;
+ brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
+ brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
+ brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
+ brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
+
+ /* Also, NULL out any stale program pointers. */
+ brw->vs.prog_data = NULL;
+ brw->vs.base.prog_data = NULL;
+ brw->tcs.prog_data = NULL;
+ brw->tcs.base.prog_data = NULL;
+ brw->tes.prog_data = NULL;
+ brw->tes.base.prog_data = NULL;
+ brw->gs.prog_data = NULL;
+ brw->gs.base.prog_data = NULL;
+ brw->wm.prog_data = NULL;
+ brw->wm.base.prog_data = NULL;
+ brw->cs.prog_data = NULL;
+ brw->cs.base.prog_data = NULL;
+
intel_batchbuffer_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index ee75ca88549..a91d07446ce 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -280,7 +280,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
&brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
- &brw_texture_surfaces,
+ &brw_cs_texture_surfaces,
&brw_cs_work_groups_surface,
&brw_cs_samplers,
&brw_cs_state,
@@ -395,7 +395,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
&brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
- &brw_texture_surfaces,
+ &brw_cs_texture_surfaces,
&brw_cs_work_groups_surface,
&brw_cs_samplers,
&brw_cs_state,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 2e79de6a883..8136339332b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -26,6 +26,7 @@
#include "brw_cfg.h"
#include "brw_vs.h"
#include "brw_nir.h"
+#include "brw_vec4_builder.h"
#include "brw_vec4_live_variables.h"
#include "brw_dead_control_flow.h"
#include "program/prog_parameter.h"
@@ -1555,11 +1556,6 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
}
}
- if (vs_prog_data->uses_drawid) {
- attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
- nr_attributes++;
- }
-
/* VertexID is stored by the VF as the last vertex element, but we
* don't represent it with a flag in inputs_read, so we call it
* VERT_ATTRIB_MAX.
@@ -1570,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
nr_attributes++;
}
+ if (vs_prog_data->uses_drawid) {
+ attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
+ nr_attributes++;
+ }
+
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
return payload_reg + vs_prog_data->nr_attributes;
@@ -1624,6 +1625,36 @@ vec4_vs_visitor::setup_payload(void)
this->first_non_payload_grf = reg;
}
+bool
+vec4_visitor::lower_minmax()
+{
+ assert(devinfo->gen < 6);
+
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ const vec4_builder ibld(this, block, inst);
+
+ if (inst->opcode == BRW_OPCODE_SEL &&
+ inst->predicate == BRW_PREDICATE_NONE) {
+ /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+ * the original SEL.L/GE instruction
+ */
+ ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+ inst->conditional_mod);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
src_reg
vec4_visitor::get_timestamp()
{
@@ -1861,7 +1892,7 @@ vec4_visitor::run()
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s-%s-00-start",
+ snprintf(filename, 64, "%s-%s-00-00-start",
stage_abbrev, nir->info.name);
backend_shader::dump_instructions(filename);
@@ -1896,6 +1927,13 @@ vec4_visitor::run()
OPT(dead_code_eliminate);
}
+ if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+ OPT(opt_cmod_propagation);
+ OPT(opt_cse);
+ OPT(opt_copy_propagation);
+ OPT(dead_code_eliminate);
+ }
+
if (failed)
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 6d388b04fdb..7b86e1bc050 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -304,6 +304,8 @@ public:
void resolve_ud_negate(src_reg *reg);
+ bool lower_minmax();
+
src_reg get_timestamp();
void dump_instruction(backend_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
index be1427c7db7..3a8617e05ac 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -66,7 +66,8 @@ namespace brw {
* instruction passed as argument.
*/
vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
- shader(shader), block(block), cursor(inst)
+ shader(shader), block(block), cursor(inst),
+ force_writemask_all(inst->force_writemask_all)
{
annotation.str = inst->annotation;
annotation.ir = inst->ir;
@@ -298,18 +299,14 @@ namespace brw {
*
* Generally useful to get the minimum or maximum of two values.
*/
- void
+ instruction *
emit_minmax(const dst_reg &dst, const src_reg &src0,
const src_reg &src1, brw_conditional_mod mod) const
{
- if (shader->devinfo->gen >= 6) {
- set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
- fix_unsigned_negate(src1)));
- } else {
- CMP(null_reg_d(), src0, src1, mod);
- set_predicate(BRW_PREDICATE_NORMAL,
- SEL(dst, src0, src1));
- }
+ assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
+
+ return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3bb768f82dc..8418a3cdc01 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -679,18 +679,8 @@ vec4_instruction *
vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
src_reg src0, src_reg src1)
{
- vec4_instruction *inst;
-
- if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(dst, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
+ vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+ inst->conditional_mod = conditionalmod;
return inst;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 5ab2f7f09df..f1b8fc4c84d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -872,16 +872,12 @@ brw_update_texture_surfaces(struct brw_context *brw)
/* BRW_NEW_FRAGMENT_PROGRAM */
struct gl_program *fs = (struct gl_program *) brw->fragment_program;
- /* BRW_NEW_COMPUTE_PROGRAM */
- struct gl_program *cs = (struct gl_program *) brw->compute_program;
-
/* _NEW_TEXTURE */
update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
- update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
/* emit alternate set of surface state for gather. this
* allows the surface format to be overriden for only the
@@ -897,8 +893,6 @@ brw_update_texture_surfaces(struct brw_context *brw)
update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
if (fs && fs->UsesGather)
update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
- if (cs && cs->UsesGather)
- update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
}
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
@@ -908,7 +902,6 @@ const struct brw_tracked_state brw_texture_surfaces = {
.dirty = {
.mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
- BRW_NEW_COMPUTE_PROGRAM |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_GEOMETRY_PROGRAM |
@@ -923,6 +916,37 @@ const struct brw_tracked_state brw_texture_surfaces = {
.emit = brw_update_texture_surfaces,
};
+static void
+brw_update_cs_texture_surfaces(struct brw_context *brw)
+{
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct gl_program *cs = (struct gl_program *) brw->compute_program;
+
+ /* _NEW_TEXTURE */
+ update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
+
+ /* emit alternate set of surface state for gather. this
+ * allows the surface format to be overriden for only the
+ * gather4 messages.
+ */
+ if (brw->gen < 8) {
+ if (cs && cs->UsesGather)
+ update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
+ }
+
+ brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+}
+
+const struct brw_tracked_state brw_cs_texture_surfaces = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_COMPUTE_PROGRAM,
+ },
+ .emit = brw_update_cs_texture_surfaces,
+};
+
+
void
brw_upload_ubo_surfaces(struct brw_context *brw,
struct gl_shader *shader,
@@ -1340,6 +1364,11 @@ brw_upload_image_surfaces(struct brw_context *brw,
}
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+ /* This may have changed the image metadata dependent on the context
+ * image unit state and passed to the program as uniforms, make sure
+ * that push and pull constants are reuploaded.
+ */
+ brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
new file mode 100644
index 00000000000..6c0c32b26f7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "program/prog_parameter.h"
+
+void
+gen7_upload_constant_state(struct brw_context *brw,
+ const struct brw_stage_state *stage_state,
+ bool active, unsigned opcode)
+{
+ uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0;
+
+ /* Disable if the shader stage is inactive or there are no push constants. */
+ active = active && stage_state->push_const_size != 0;
+
+ int dwords = brw->gen >= 8 ? 11 : 7;
+ BEGIN_BATCH(dwords);
+ OUT_BATCH(opcode << 16 | (dwords - 2));
+
+ /* Workaround for SKL+ (we use option #2 until we have a need for more
+ * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_*
+ *
+ * The driver must ensure The following case does not occur without a flush
+ * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to
+ * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length
+ * not equal to zero committed. Possible ways to avoid this condition
+ * include:
+ * 1. always force buffer 3 to have a non zero read length
+ * 2. always force buffer 0 to a zero read length
+ */
+ if (brw->gen >= 9 && active) {
+ OUT_BATCH(0);
+ OUT_BATCH(stage_state->push_const_size);
+ } else {
+ OUT_BATCH(active ? stage_state->push_const_size : 0);
+ OUT_BATCH(0);
+ }
+ /* Pointer to the constant buffer. Covered by the set of state flags
+ * from gen6_prepare_wm_contants
+ */
+ if (brw->gen >= 9 && active) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ /* XXX: When using buffers other than 0, you need to specify the
+ * graphics virtual address regardless of INSPM/debug bits
+ */
+ OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0,
+ stage_state->push_const_offset);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else if (brw->gen >= 8) {
+ OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+
+ ADVANCE_BATCH();
+
+ /* On SKL+ the new constants don't take effect until the next corresponding
+ * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
+ * that is sent
+ */
+ if (brw->gen >= 9)
+ brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+}
+
+/**
+ * Creates a streamed BO containing the push constants for the VS or GS on
+ * gen6+.
+ *
+ * Push constants are constant values (such as GLSL uniforms) that are
+ * pre-loaded into a shader stage's register space at thread spawn time.
+ *
+ * Not all GLSL uniforms will be uploaded as push constants: The hardware has
+ * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
+ * uploaded as push constants, while GL 4.4 requires at least 1024 components
+ * to be usable for the VS. Plus, currently we always use pull constants
+ * instead of push constants when doing variable-index array access.
+ *
+ * See brw_curbe.c for the equivalent gen4/5 code.
+ */
+void
+gen6_upload_push_constants(struct brw_context *brw,
+ const struct gl_program *prog,
+ const struct brw_stage_prog_data *prog_data,
+ struct brw_stage_state *stage_state,
+ enum aub_state_struct_type type)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ if (prog_data->nr_params == 0) {
+ stage_state->push_const_size = 0;
+ } else {
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ /* XXX: Should this happen somewhere before to get our state flag set? */
+ if (prog)
+ _mesa_load_state_parameters(ctx, prog->Parameters);
+
+ gl_constant_value *param;
+ int i;
+
+ param = brw_state_batch(brw, type,
+ prog_data->nr_params * sizeof(gl_constant_value),
+ 32, &stage_state->push_const_offset);
+
+ STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+
+ /* _NEW_PROGRAM_CONSTANTS
+ *
+ * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
+ * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
+ * wouldn't be set for them.
+ */
+ for (i = 0; i < prog_data->nr_params; i++) {
+ param[i] = *prog_data->param[i];
+ }
+
+ if (0) {
+ fprintf(stderr, "%s constants:\n",
+ _mesa_shader_stage_to_string(stage_state->stage));
+ for (i = 0; i < prog_data->nr_params; i++) {
+ if ((i & 7) == 0)
+ fprintf(stderr, "g%d: ",
+ prog_data->dispatch_grf_start_reg + i / 8);
+ fprintf(stderr, "%8f ", param[i].f);
+ if ((i & 7) == 7)
+ fprintf(stderr, "\n");
+ }
+ if ((i & 7) != 0)
+ fprintf(stderr, "\n");
+ fprintf(stderr, "\n");
+ }
+
+ stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
+ /* We can only push 32 registers of constants at a time. */
+
+ /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to
+ * 32"
+ *
+ * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
+ *
+ * "The sum of all four read length fields must be less than or
+ * equal to the size of 64"
+ *
+ * The other shader stages all match the VS's limits.
+ */
+ assert(stage_state->push_const_size <= 32);
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 4bc0a8598d6..c18610ca38c 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -33,95 +33,6 @@
#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
-/**
- * Creates a streamed BO containing the push constants for the VS or GS on
- * gen6+.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.
- *
- * Not all GLSL uniforms will be uploaded as push constants: The hardware has
- * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
- * uploaded as push constants, while GL 4.4 requires at least 1024 components
- * to be usable for the VS. Plus, currently we always use pull constants
- * instead of push constants when doing variable-index array access.
- *
- * See brw_curbe.c for the equivalent gen4/5 code.
- */
-void
-gen6_upload_push_constants(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_stage_prog_data *prog_data,
- struct brw_stage_state *stage_state,
- enum aub_state_struct_type type)
-{
- struct gl_context *ctx = &brw->ctx;
-
- if (prog_data->nr_params == 0) {
- stage_state->push_const_size = 0;
- } else {
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- /* XXX: Should this happen somewhere before to get our state flag set? */
- if (prog)
- _mesa_load_state_parameters(ctx, prog->Parameters);
-
- gl_constant_value *param;
- unsigned i;
-
- param = brw_state_batch(brw, type,
- prog_data->nr_params * sizeof(gl_constant_value),
- 32, &stage_state->push_const_offset);
-
- STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
- /* _NEW_PROGRAM_CONSTANTS
- *
- * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
- * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
- * wouldn't be set for them.
- */
- for (i = 0; i < prog_data->nr_params; i++) {
- param[i] = *prog_data->param[i];
- }
-
- if (0) {
- fprintf(stderr, "%s constants:\n",
- _mesa_shader_stage_to_string(stage_state->stage));
- for (i = 0; i < prog_data->nr_params; i++) {
- if ((i & 7) == 0)
- fprintf(stderr, "g%d: ",
- prog_data->dispatch_grf_start_reg + i / 8);
- fprintf(stderr, "%8f ", param[i].f);
- if ((i & 7) == 7)
- fprintf(stderr, "\n");
- }
- if ((i & 7) != 0)
- fprintf(stderr, "\n");
- fprintf(stderr, "\n");
- }
-
- stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
- /* We can only push 32 registers of constants at a time. */
-
- /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to
- * 32"
- *
- * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
- *
- * "The sum of all four read length fields must be less than or
- * equal to the size of 64"
- *
- * The other shader stages all match the VS's limits.
- */
- assert(stage_state->push_const_size <= 32);
- }
-}
-
static void
gen6_upload_vs_push_constants(struct brw_context *brw)
{
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index 0c1813f9048..c4babc24f43 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -298,7 +298,12 @@ static struct brw_l3_weights
get_pipeline_state_l3_weights(const struct brw_context *brw)
{
const struct brw_stage_state *stage_states[] = {
- &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base
+ [MESA_SHADER_VERTEX] = &brw->vs.base,
+ [MESA_SHADER_TESS_CTRL] = &brw->tcs.base,
+ [MESA_SHADER_TESS_EVAL] = &brw->tes.base,
+ [MESA_SHADER_GEOMETRY] = &brw->gs.base,
+ [MESA_SHADER_FRAGMENT] = &brw->wm.base,
+ [MESA_SHADER_COMPUTE] = &brw->cs.base
};
bool needs_dc = false, needs_slm = false;
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index a18dc697651..0daecdc9ed2 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -29,81 +29,6 @@
#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
-
-void
-gen7_upload_constant_state(struct brw_context *brw,
- const struct brw_stage_state *stage_state,
- bool active, unsigned opcode)
-{
- uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0;
-
- /* Disable if the shader stage is inactive or there are no push constants. */
- active = active && stage_state->push_const_size != 0;
-
- int dwords = brw->gen >= 8 ? 11 : 7;
- BEGIN_BATCH(dwords);
- OUT_BATCH(opcode << 16 | (dwords - 2));
-
- /* Workaround for SKL+ (we use option #2 until we have a need for more
- * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_*
- *
- * The driver must ensure The following case does not occur without a flush
- * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to
- * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length
- * not equal to zero committed. Possible ways to avoid this condition
- * include:
- * 1. always force buffer 3 to have a non zero read length
- * 2. always force buffer 0 to a zero read length
- */
- if (brw->gen >= 9 && active) {
- OUT_BATCH(0);
- OUT_BATCH(stage_state->push_const_size);
- } else {
- OUT_BATCH(active ? stage_state->push_const_size : 0);
- OUT_BATCH(0);
- }
-
- /* Pointer to the constant buffer. Covered by the set of state flags
- * from gen6_prepare_wm_contants
- */
- if (brw->gen >= 9 && active) {
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- /* XXX: When using buffers other than 0, you need to specify the
- * graphics virtual address regardless of INSPM/debug bits
- */
- OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0,
- stage_state->push_const_offset);
- OUT_BATCH(0);
- OUT_BATCH(0);
- } else if (brw->gen>= 8) {
- OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- } else {
- OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- }
-
- ADVANCE_BATCH();
-
- /* On SKL+ the new constants don't take effect until the next corresponding
- * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
- * that is sent
- */
- if (brw->gen >= 9)
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
static void
upload_vs_state(struct brw_context *brw)
{
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index fc8f701a24c..0a52815b8a6 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -197,6 +197,28 @@ gen8_emit_fast_clear_color(struct brw_context *brw,
surf[7] |= mt->fast_clear_color_value;
}
+static uint32_t
+gen8_get_aux_mode(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt,
+ uint32_t surf_type)
+{
+ if (mt->mcs_mt == NULL)
+ return GEN8_SURFACE_AUX_MODE_NONE;
+
+ /*
+ * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
+ * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
+ *
+ * From the hardware spec for GEN9:
+ * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
+ * 16 must be used."
+ */
+ if (brw->gen >= 9 || mt->num_samples == 1)
+ assert(mt->halign == 16);
+
+ return GEN8_SURFACE_AUX_MODE_MCS;
+}
+
static void
gen8_emit_texture_surface_state(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -209,13 +231,13 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
bool rw, bool for_gather)
{
const unsigned depth = max_layer - min_layer;
- struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
+ struct intel_mipmap_tree *aux_mt = mt->mcs_mt;
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
unsigned tiling_mode, pitch;
const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
const uint32_t surf_type = translate_tex_target(target);
+ uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type);
if (mt->format == MESA_FORMAT_S_UINT8) {
tiling_mode = GEN8_SURFACE_TILING_W;
@@ -229,20 +251,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
* buffer should always have been resolved before it is used as a texture
* so there is no need for it.
*/
- if (mt->mcs_mt && mt->num_samples > 1) {
- aux_mt = mt->mcs_mt;
- aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
-
- /*
- * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
- * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
- *
- * From the hardware spec for GEN9:
- * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
- * 16 must be used."
- */
- if (brw->gen >= 9 || mt->num_samples == 1)
- assert(mt->halign == 16);
+ if (mt->num_samples <= 1) {
+ aux_mt = NULL;
+ aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
}
uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
@@ -418,8 +429,6 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
struct gl_context *ctx = &brw->ctx;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb->mt;
- struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
unsigned width = mt->logical_width0;
unsigned height = mt->logical_height0;
unsigned pitch = mt->pitch;
@@ -472,21 +481,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
__func__, _mesa_get_format_name(rb_format));
}
- if (mt->mcs_mt) {
- aux_mt = mt->mcs_mt;
- aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
-
- /*
- * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
- * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
- *
- * From the hardware spec for GEN9:
- * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
- * 16 must be used."
- */
- if (brw->gen >= 9 || mt->num_samples == 1)
- assert(mt->halign == 16);
- }
+ struct intel_mipmap_tree *aux_mt = mt->mcs_mt;
+ const uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type);
uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 6d29fbdde21..72cf9af5b53 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -317,8 +317,8 @@ intel_miptree_blit(struct brw_context *brw,
*/
intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice);
intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice);
- intel_miptree_resolve_color(brw, src_mt);
- intel_miptree_resolve_color(brw, dst_mt);
+ intel_miptree_resolve_color(brw, src_mt, 0);
+ intel_miptree_resolve_color(brw, dst_mt, 0);
if (src_flip)
src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height;
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index dbbac1c95a2..08b7623e63d 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -270,11 +270,11 @@ intel_copy_image_sub_data(struct gl_context *ctx,
*/
intel_miptree_all_slices_resolve_hiz(brw, src_mt);
intel_miptree_all_slices_resolve_depth(brw, src_mt);
- intel_miptree_resolve_color(brw, src_mt);
+ intel_miptree_resolve_color(brw, src_mt, 0);
intel_miptree_all_slices_resolve_hiz(brw, dst_mt);
intel_miptree_all_slices_resolve_depth(brw, dst_mt);
- intel_miptree_resolve_color(brw, dst_mt);
+ intel_miptree_resolve_color(brw, dst_mt, 0);
_mesa_get_format_block_size(src_mt->format, &bw, &bh);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 108dd87dd8b..6c233d84df9 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -161,8 +161,9 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
}
}
-static bool
-intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
+bool
+intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
+ unsigned tiling)
{
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* Target(s)", beneath the "Fast Color Clear" bullet (p326):
@@ -200,9 +201,9 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
* - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
* non-MSRTs only.
*/
-static bool
+bool
intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+ const struct intel_mipmap_tree *mt)
{
/* MCS support does not exist prior to Gen7 */
if (brw->gen < 7)
@@ -266,6 +267,32 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
return true;
}
+/* On Gen9 support for color buffer compression was extended to single
+ * sampled surfaces. This is a helper considering both auxiliary buffer
+ * type and number of samples telling if the given miptree represents
+ * the new single sampled case - also called lossless compression.
+ */
+bool
+intel_miptree_is_lossless_compressed(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt)
+{
+ /* Only available from Gen9 onwards. */
+ if (brw->gen < 9)
+ return false;
+
+ /* Compression always requires auxiliary buffer. */
+ if (!mt->mcs_mt)
+ return false;
+
+ /* Single sample compression is represented re-using msaa compression
+ * layout type: "Compressed Multisampled Surfaces".
+ */
+ if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS)
+ return false;
+
+ /* And finally distinguish between msaa and single sample case. */
+ return mt->num_samples <= 1;
+}
/**
* Determine depth format corresponding to a depth+stencil format,
@@ -609,22 +636,21 @@ intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
return size;
}
-struct intel_mipmap_tree *
-intel_miptree_create(struct brw_context *brw,
- GLenum target,
- mesa_format format,
- GLuint first_level,
- GLuint last_level,
- GLuint width0,
- GLuint height0,
- GLuint depth0,
- GLuint num_samples,
- uint32_t layout_flags)
+static struct intel_mipmap_tree *
+miptree_create(struct brw_context *brw,
+ GLenum target,
+ mesa_format format,
+ GLuint first_level,
+ GLuint last_level,
+ GLuint width0,
+ GLuint height0,
+ GLuint depth0,
+ GLuint num_samples,
+ uint32_t layout_flags)
{
struct intel_mipmap_tree *mt;
mesa_format tex_format = format;
mesa_format etc_format = MESA_FORMAT_NONE;
- GLuint total_width, total_height;
uint32_t alloc_flags = 0;
format = intel_lower_compressed_format(brw, format);
@@ -645,21 +671,8 @@ intel_miptree_create(struct brw_context *brw,
return NULL;
}
- total_width = mt->total_width;
- total_height = mt->total_height;
-
- if (format == MESA_FORMAT_S_UINT8) {
- /* Align to size of W tile, 64x64. */
- total_width = ALIGN(total_width, 64);
- total_height = ALIGN(total_height, 64);
- }
-
- bool y_or_x = false;
-
- if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) {
- y_or_x = true;
+ if (mt->tiling == (I915_TILING_Y | I915_TILING_X))
mt->tiling = I915_TILING_Y;
- }
if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
alloc_flags |= BO_ALLOC_FOR_RENDER;
@@ -675,26 +688,61 @@ intel_miptree_create(struct brw_context *brw,
mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree",
size, alignment);
} else {
- mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
- total_width, total_height, mt->cpp,
- &mt->tiling, &pitch,
- alloc_flags);
+ if (format == MESA_FORMAT_S_UINT8) {
+ /* Align to size of W tile, 64x64. */
+ mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
+ ALIGN(mt->total_width, 64),
+ ALIGN(mt->total_height, 64),
+ mt->cpp, &mt->tiling, &pitch,
+ alloc_flags);
+ } else {
+ mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
+ mt->total_width, mt->total_height,
+ mt->cpp, &mt->tiling, &pitch,
+ alloc_flags);
+ }
}
mt->pitch = pitch;
+ return mt;
+}
+
+struct intel_mipmap_tree *
+intel_miptree_create(struct brw_context *brw,
+ GLenum target,
+ mesa_format format,
+ GLuint first_level,
+ GLuint last_level,
+ GLuint width0,
+ GLuint height0,
+ GLuint depth0,
+ GLuint num_samples,
+ uint32_t layout_flags)
+{
+ struct intel_mipmap_tree *mt = miptree_create(
+ brw, target, format,
+ first_level, last_level,
+ width0, height0, depth0, num_samples,
+ layout_flags);
+
/* If the BO is too large to fit in the aperture, we need to use the
* BLT engine to support it. Prior to Sandybridge, the BLT paths can't
* handle Y-tiling, so we need to fall back to X.
*/
- if (brw->gen < 6 && y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) {
+ if (brw->gen < 6 && mt->bo->size >= brw->max_gtt_map_object_size &&
+ mt->tiling == I915_TILING_Y) {
+ unsigned long pitch = mt->pitch;
+ const uint32_t alloc_flags =
+ (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) ?
+ BO_ALLOC_FOR_RENDER : 0;
perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
mt->total_width, mt->total_height);
mt->tiling = I915_TILING_X;
drm_intel_bo_unreference(mt->bo);
mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
- total_width, total_height, mt->cpp,
+ mt->total_width, mt->total_height, mt->cpp,
&mt->tiling, &pitch, alloc_flags);
mt->pitch = pitch;
}
@@ -1402,6 +1450,27 @@ intel_miptree_copy_teximage(struct brw_context *brw,
intel_obj->needs_validate = true;
}
+static void
+intel_miptree_init_mcs(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ int init_value)
+{
+ /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
+ *
+ * When MCS buffer is enabled and bound to MSRT, it is required that it
+ * is cleared prior to any rendering.
+ *
+ * Since we don't use the MCS buffer for any purpose other than rendering,
+ * it makes sense to just clear it immediately upon allocation.
+ *
+ * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
+ */
+ void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
+ memset(data, init_value, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
+ intel_miptree_unmap_raw(mt->mcs_mt);
+ mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+}
+
static bool
intel_miptree_alloc_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -1447,31 +1516,18 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
*/
const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
MIPTREE_LAYOUT_TILING_Y;
- mt->mcs_mt = intel_miptree_create(brw,
- mt->target,
- format,
- mt->first_level,
- mt->last_level,
- mt->logical_width0,
- mt->logical_height0,
- mt->logical_depth0,
- 0 /* num_samples */,
- mcs_flags);
-
- /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
- *
- * When MCS buffer is enabled and bound to MSRT, it is required that it
- * is cleared prior to any rendering.
- *
- * Since we don't use the MCS buffer for any purpose other than rendering,
- * it makes sense to just clear it immediately upon allocation.
- *
- * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
- */
- void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
- memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
- intel_miptree_unmap_raw(mt->mcs_mt);
- mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+ mt->mcs_mt = miptree_create(brw,
+ mt->target,
+ format,
+ mt->first_level,
+ mt->last_level,
+ mt->logical_width0,
+ mt->logical_height0,
+ mt->logical_depth0,
+ 0 /* num_samples */,
+ mcs_flags);
+
+ intel_miptree_init_mcs(brw, mt, 0xFF);
return mt->mcs_mt;
}
@@ -1520,16 +1576,16 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
if (brw->gen >= 8) {
layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
}
- mt->mcs_mt = intel_miptree_create(brw,
- mt->target,
- format,
- mt->first_level,
- mt->last_level,
- mcs_width,
- mcs_height,
- mt->logical_depth0,
- 0 /* num_samples */,
- layout_flags);
+ mt->mcs_mt = miptree_create(brw,
+ mt->target,
+ format,
+ mt->first_level,
+ mt->last_level,
+ mcs_width,
+ mcs_height,
+ mt->logical_depth0,
+ 0 /* num_samples */,
+ layout_flags);
return mt->mcs_mt;
}
@@ -1991,8 +2047,17 @@ intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
void
intel_miptree_resolve_color(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+ struct intel_mipmap_tree *mt,
+ int flags)
{
+ /* From gen9 onwards there is new compression scheme for single sampled
+ * surfaces called "lossless compressed". These don't need to be always
+ * resolved.
+ */
+ if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) &&
+ intel_miptree_is_lossless_compressed(brw, mt))
+ return;
+
switch (mt->fast_clear_state) {
case INTEL_FAST_CLEAR_STATE_NO_MCS:
case INTEL_FAST_CLEAR_STATE_RESOLVED:
@@ -2001,8 +2066,10 @@ intel_miptree_resolve_color(struct brw_context *brw,
case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
case INTEL_FAST_CLEAR_STATE_CLEAR:
/* Fast color clear resolves only make sense for non-MSAA buffers. */
- if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
+ if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE ||
+ intel_miptree_is_lossless_compressed(brw, mt)) {
brw_meta_resolve_color(brw, mt);
+ }
break;
}
}
@@ -2029,7 +2096,7 @@ intel_miptree_make_shareable(struct brw_context *brw,
assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
if (mt->mcs_mt) {
- intel_miptree_resolve_color(brw, mt);
+ intel_miptree_resolve_color(brw, mt, 0);
intel_miptree_release(&mt->mcs_mt);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
}
@@ -2137,7 +2204,7 @@ intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
/* CPU accesses to color buffers don't understand fast color clears, so
* resolve any pending fast color clears before we map.
*/
- intel_miptree_resolve_color(brw, mt);
+ intel_miptree_resolve_color(brw, mt, 0);
drm_intel_bo *bo = mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 64f73ea9ae5..7cdfb37ff41 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -667,6 +667,18 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height);
bool
+intel_miptree_is_lossless_compressed(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt);
+
+bool
+intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
+ unsigned tiling);
+
+bool
+intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
+ const struct intel_mipmap_tree *mt);
+
+bool
intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt);
@@ -884,9 +896,19 @@ intel_miptree_used_for_rendering(struct intel_mipmap_tree *mt)
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED;
}
+/**
+ * Flag values telling color resolve pass which special types of buffers
+ * can be ignored.
+ *
+ * INTEL_MIPTREE_IGNORE_CCS_E: Lossless compressed (single-sample
+ * compression scheme since gen9)
+ */
+#define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0)
+
void
intel_miptree_resolve_color(struct brw_context *brw,
- struct intel_mipmap_tree *mt);
+ struct intel_mipmap_tree *mt,
+ int flags);
void
intel_miptree_make_shareable(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
index 699e48a297c..54a741395eb 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -257,7 +257,7 @@ do_blit_bitmap( struct gl_context *ctx,
/* The blitter has no idea about fast color clears, so we need to resolve
* the miptree before we do anything.
*/
- intel_miptree_resolve_color(brw, irb->mt);
+ intel_miptree_resolve_color(brw, irb->mt, 0);
/* Chop it all into chunks that can be digested by hardware: */
for (py = 0; py < height; py += DY) {
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 10d14623fe1..31030b1b4ea 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -155,7 +155,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
/* Since we are going to read raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
- intel_miptree_resolve_color(brw, irb->mt);
+ intel_miptree_resolve_color(brw, irb->mt, 0);
bo = irb->mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 5d32a4ce650..e21c3ac543f 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -423,7 +423,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
/* Since we are going to write raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
- intel_miptree_resolve_color(brw, image->mt);
+ intel_miptree_resolve_color(brw, image->mt, 0);
bo = image->mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 970ded1e66b..573f701acdd 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -140,7 +140,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
/* Since we are going to write raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
- intel_miptree_resolve_color(brw, image->mt);
+ intel_miptree_resolve_color(brw, image->mt, 0);
bo = image->mt->bo;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 2cf9c13be33..3a811e10359 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -24,6 +24,7 @@
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/samplerobj.h"
+#include "main/teximage.h"
#include "main/texobj.h"
#include "brw_context.h"
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 08f13178f84..61f703667aa 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -766,7 +766,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate)
_mesa_set_enable(ctx, GL_TEXTURE_2D, !!(unit->Enabled & TEXTURE_2D_BIT));
_mesa_set_enable(ctx, GL_TEXTURE_3D, !!(unit->Enabled & TEXTURE_3D_BIT));
if (ctx->Extensions.ARB_texture_cube_map) {
- _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP_ARB,
+ _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP,
!!(unit->Enabled & TEXTURE_CUBE_BIT));
}
if (ctx->Extensions.NV_texture_rectangle) {
@@ -837,7 +837,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate)
/* don't restore state for unsupported targets to prevent
* raising GL errors.
*/
- if (obj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
+ if (obj->Target == GL_TEXTURE_CUBE_MAP &&
!ctx->Extensions.ARB_texture_cube_map) {
continue;
}
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index de1aba44c1b..9aec42508a7 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3007,8 +3007,8 @@ set_atomic_buffer_binding(struct gl_context *ctx,
_mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj);
if (bufObj == ctx->Shared->NullBufferObj) {
- binding->Offset = -1;
- binding->Size = -1;
+ binding->Offset = 0;
+ binding->Size = 0;
} else {
binding->Offset = offset;
binding->Size = size;
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 83e238ae825..26dafd1b786 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -58,10 +58,7 @@ supported_buffer_bitmask(const struct gl_context *ctx,
if (_mesa_is_user_fbo(fb)) {
/* A user-created renderbuffer */
- GLuint i;
- for (i = 0; i < ctx->Const.MaxColorAttachments; i++) {
- mask |= (BUFFER_BIT_COLOR0 << i);
- }
+ mask = ((1 << ctx->Const.MaxColorAttachments) - 1) << BUFFER_COLOR0;
}
else {
/* A window system framebuffer */
@@ -159,6 +156,9 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
case GL_COLOR_ATTACHMENT7_EXT:
return BUFFER_BIT_COLOR7;
default:
+ /* not an error, but also not supported */
+ if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31)
+ return 1 << BUFFER_COUNT;
/* error */
return BAD_MASK;
}
@@ -171,7 +171,7 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
* renderbuffer (a BUFFER_* value).
* return -1 for an invalid buffer.
*/
-static GLint
+static gl_buffer_index
read_buffer_enum_to_index(GLenum buffer)
{
switch (buffer) {
@@ -214,6 +214,9 @@ read_buffer_enum_to_index(GLenum buffer)
case GL_COLOR_ATTACHMENT7_EXT:
return BUFFER_COLOR7;
default:
+ /* not an error, but also not supported */
+ if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31)
+ return BUFFER_COUNT;
/* error */
return -1;
}
@@ -221,7 +224,7 @@ read_buffer_enum_to_index(GLenum buffer)
/**
- * Called by glDrawBuffer().
+ * Called by glDrawBuffer() and glNamedFramebufferDrawBuffer().
* Specify which renderbuffer(s) to draw into for the first color output.
* <buffer> can name zero, one, two or four renderbuffers!
* \sa _mesa_DrawBuffers
@@ -242,9 +245,9 @@ read_buffer_enum_to_index(GLenum buffer)
*
* See the GL_EXT_framebuffer_object spec for more info.
*/
-void
-_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller)
+static void
+draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
+ GLenum buffer, const char *caller)
{
GLbitfield destMask;
@@ -293,7 +296,7 @@ void GLAPIENTRY
_mesa_DrawBuffer(GLenum buffer)
{
GET_CURRENT_CONTEXT(ctx);
- _mesa_draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer");
+ draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer");
}
@@ -312,22 +315,22 @@ _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf)
else
fb = ctx->WinSysDrawBuffer;
- _mesa_draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer");
+ draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer");
}
/**
- * Called by glDrawBuffersARB; specifies the destination color renderbuffers
- * for N fragment program color outputs.
+ * Called by glDrawBuffersARB() and glNamedFramebufferDrawBuffers() to specify
+ * the destination color renderbuffers for N fragment program color outputs.
* \sa _mesa_DrawBuffer
* \param n number of outputs
* \param buffers array [n] of renderbuffer names. Unlike glDrawBuffer, the
* names cannot specify more than one buffer. For example,
* GL_FRONT_AND_BACK is illegal.
*/
-void
-_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLsizei n, const GLenum *buffers, const char *caller)
+static void
+draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
+ GLsizei n, const GLenum *buffers, const char *caller)
{
GLuint output;
GLbitfield usedBufferMask, supportedMask;
@@ -502,7 +505,7 @@ void GLAPIENTRY
_mesa_DrawBuffers(GLsizei n, const GLenum *buffers)
{
GET_CURRENT_CONTEXT(ctx);
- _mesa_draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers");
+ draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers");
}
@@ -522,7 +525,7 @@ _mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n,
else
fb = ctx->WinSysDrawBuffer;
- _mesa_draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers");
+ draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers");
}
@@ -545,8 +548,8 @@ updated_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb)
/**
- * Helper function to set the GL_DRAW_BUFFER state in the context and
- * current FBO. Called via glDrawBuffer(), glDrawBuffersARB()
+ * Helper function to set the GL_DRAW_BUFFER state for the given context and
+ * FBO. Called via glDrawBuffer(), glDrawBuffersARB()
*
* All error checking will have been done prior to calling this function
* so nothing should go wrong at this point.
@@ -662,14 +665,17 @@ _mesa_update_draw_buffers(struct gl_context *ctx)
/**
* Like \sa _mesa_drawbuffers(), this is a helper function for setting
- * GL_READ_BUFFER state in the context and current FBO.
+ * GL_READ_BUFFER state for the given context and FBO.
+ * Note that all error checking should have been done before calling
+ * this function.
* \param ctx the rendering context
+ * \param fb the framebuffer object to update
* \param buffer GL_FRONT, GL_BACK, GL_COLOR_ATTACHMENT0, etc.
* \param bufferIndex the numerical index corresponding to 'buffer'
*/
void
_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, GLint bufferIndex)
+ GLenum buffer, gl_buffer_index bufferIndex)
{
if ((fb == ctx->ReadBuffer) && _mesa_is_winsys_fbo(fb)) {
/* Only update the per-context READ_BUFFER state if we're bound to
@@ -687,15 +693,16 @@ _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
/**
- * Called by glReadBuffer to set the source renderbuffer for reading pixels.
+ * Called by glReadBuffer and glNamedFramebufferReadBuffer to set the source
+ * renderbuffer for reading pixels.
* \param mode color buffer such as GL_FRONT, GL_BACK, etc.
*/
-void
-_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller)
+static void
+read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
+ GLenum buffer, const char *caller)
{
GLbitfield supportedMask;
- GLint srcBuffer;
+ gl_buffer_index srcBuffer;
FLUSH_VERTICES(ctx, 0);
@@ -740,7 +747,7 @@ void GLAPIENTRY
_mesa_ReadBuffer(GLenum buffer)
{
GET_CURRENT_CONTEXT(ctx);
- _mesa_read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer");
+ read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer");
}
@@ -759,5 +766,5 @@ _mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src)
else
fb = ctx->WinSysReadBuffer;
- _mesa_read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer");
+ read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer");
}
diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h
index 5aa79fda54b..9df08154688 100644
--- a/src/mesa/main/buffers.h
+++ b/src/mesa/main/buffers.h
@@ -34,13 +34,11 @@
#include "glheader.h"
+#include "mtypes.h"
struct gl_context;
struct gl_framebuffer;
-extern void
-_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller);
extern void GLAPIENTRY
_mesa_DrawBuffer( GLenum mode );
@@ -48,10 +46,6 @@ _mesa_DrawBuffer( GLenum mode );
extern void GLAPIENTRY
_mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf);
-extern void
-_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLsizei n, const GLenum *buffers, const char *caller);
-
extern void GLAPIENTRY
_mesa_DrawBuffers(GLsizei n, const GLenum *buffers);
@@ -66,16 +60,12 @@ _mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb,
extern void
_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, GLint bufferIndex);
+ GLenum buffer, gl_buffer_index bufferIndex);
extern void
_mesa_update_draw_buffers(struct gl_context *ctx);
-extern void
-_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
- GLenum buffer, const char *caller);
-
extern void GLAPIENTRY
_mesa_ReadBuffer( GLenum mode );
diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c
index 3bfcc5c0e39..92f69ab9b59 100644
--- a/src/mesa/main/clear.c
+++ b/src/mesa/main/clear.c
@@ -325,18 +325,6 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value)
_mesa_update_state( ctx );
}
- /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
- * of the OpenGL 4.5 spec states:
- *
- * "An INVALID_ENUM error is generated by ClearBufferiv and
- * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL."
- */
- if (buffer == GL_DEPTH || buffer == GL_DEPTH_STENCIL) {
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glClearBufferiv(buffer=GL_DEPTH || GL_DEPTH_STENCIL)");
- return;
- }
-
switch (buffer) {
case GL_STENCIL:
/* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
@@ -386,26 +374,13 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value)
}
}
break;
- case GL_DEPTH:
- /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
- *
- * "The result of ClearBuffer is undefined if no conversion between
- * the type of the specified value and the type of the buffer being
- * cleared is defined (for example, if ClearBufferiv is called for a
- * fixed- or floating-point buffer, or if ClearBufferfv is called
- * for a signed or unsigned integer buffer). This is not an error."
+ default:
+ /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+ * of the OpenGL 4.5 spec states:
*
- * In this case we take "undefined" and "not an error" to mean "ignore."
- * Note that we still need to generate an error for the invalid
- * drawbuffer case (see the GL_STENCIL case above).
+ * "An INVALID_ENUM error is generated by ClearBufferiv and
+ * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL."
*/
- if (drawbuffer != 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferiv(drawbuffer=%d)",
- drawbuffer);
- return;
- }
- return;
- default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferiv(buffer=%s)",
_mesa_enum_to_string(buffer));
return;
@@ -470,32 +445,13 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value)
}
}
break;
- case GL_DEPTH:
- case GL_STENCIL:
- /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
- *
- * "The result of ClearBuffer is undefined if no conversion between
- * the type of the specified value and the type of the buffer being
- * cleared is defined (for example, if ClearBufferiv is called for a
- * fixed- or floating-point buffer, or if ClearBufferfv is called
- * for a signed or unsigned integer buffer). This is not an error."
- *
- * In this case we take "undefined" and "not an error" to mean "ignore."
- * Even though we could do something sensible for GL_STENCIL, page 263
- * (page 279 of the PDF) says:
- *
- * "Only ClearBufferiv should be used to clear stencil buffers."
+ default:
+ /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+ * of the OpenGL 4.5 spec states:
*
- * Note that we still need to generate an error for the invalid
- * drawbuffer case (see the GL_STENCIL case in _mesa_ClearBufferiv).
+ * "An INVALID_ENUM error is generated by ClearBufferuiv and
+ * ClearNamedFramebufferuiv if buffer is not COLOR."
*/
- if (drawbuffer != 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferuiv(drawbuffer=%d)",
- drawbuffer);
- return;
- }
- return;
- default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferuiv(buffer=%s)",
_mesa_enum_to_string(buffer));
return;
@@ -587,26 +543,13 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value)
}
}
break;
- case GL_STENCIL:
- /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
- *
- * "The result of ClearBuffer is undefined if no conversion between
- * the type of the specified value and the type of the buffer being
- * cleared is defined (for example, if ClearBufferiv is called for a
- * fixed- or floating-point buffer, or if ClearBufferfv is called
- * for a signed or unsigned integer buffer). This is not an error."
+ default:
+ /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+ * of the OpenGL 4.5 spec states:
*
- * In this case we take "undefined" and "not an error" to mean "ignore."
- * Note that we still need to generate an error for the invalid
- * drawbuffer case (see the GL_DEPTH case above).
+ * "An INVALID_ENUM error is generated by ClearBufferfv and
+ * ClearNamedFramebufferfv if buffer is not COLOR or DEPTH."
*/
- if (drawbuffer != 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferfv(drawbuffer=%d)",
- drawbuffer);
- return;
- }
- return;
- default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfv(buffer=%s)",
_mesa_enum_to_string(buffer));
return;
diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c
index 53e7a500f61..b71430f2b12 100644
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -41,6 +41,9 @@ _mesa_DispatchCompute(GLuint num_groups_x,
if (!_mesa_validate_DispatchCompute(ctx, num_groups))
return;
+ if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
+ return;
+
ctx->Driver.DispatchCompute(ctx, num_groups);
}
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 2d53e2fa72f..820ae072da6 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -288,17 +288,6 @@
#define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0
/*@}*/
-/** For GL_ARB_compute_shader */
-/*@{*/
-#define MAX_COMPUTE_UNIFORM_BLOCKS 12
-#define MAX_COMPUTE_TEXTURE_IMAGE_UNITS 16
-#define MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 8
-#define MAX_COMPUTE_ATOMIC_COUNTERS 8
-#define MAX_COMPUTE_SHARED_MEMORY_SIZE 32768
-#define MAX_COMPUTE_UNIFORM_COMPONENTS 512
-#define MAX_COMPUTE_IMAGE_UNIFORMS 8
-/*@}*/
-
/** For GL_ARB_pipeline_statistics_query */
#define MAX_PIPELINE_STATISTICS 11
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 9388a1ca51d..26eee28db4e 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1600,9 +1600,6 @@ _mesa_check_init_viewport(struct gl_context *ctx, GLuint width, GLuint height)
static void
handle_first_current(struct gl_context *ctx)
{
- GLenum buffer;
- GLint bufferIndex;
-
if (ctx->Version == 0) {
/* probably in the process of tearing down the context */
return;
@@ -1617,6 +1614,8 @@ handle_first_current(struct gl_context *ctx)
* For GLES it is always GL_BACK which has a magic interpretation */
if (!ctx->HasConfig && _mesa_is_desktop_gl(ctx)) {
if (ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
+ GLenum buffer;
+
if (ctx->DrawBuffer->Visual.doubleBufferMode)
buffer = GL_BACK;
else
@@ -1627,6 +1626,9 @@ handle_first_current(struct gl_context *ctx)
}
if (ctx->ReadBuffer != _mesa_get_incomplete_framebuffer()) {
+ gl_buffer_index bufferIndex;
+ GLenum buffer;
+
if (ctx->ReadBuffer->Visual.doubleBufferMode) {
buffer = GL_BACK;
bufferIndex = BUFFER_BACK_LEFT;
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 19ef3042548..3f5aa5db051 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -35,6 +35,7 @@
#include "glheader.h"
+struct gl_bitmap_atlas;
struct gl_buffer_object;
struct gl_context;
struct gl_display_list;
@@ -154,6 +155,14 @@ struct dd_function_table {
GLint x, GLint y, GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *bitmap );
+
+ /**
+ * Called by display list code for optimized glCallLists/glBitmap rendering
+ * The driver must support texture rectangles of width 1024 or more.
+ */
+ void (*DrawAtlasBitmaps)(struct gl_context *ctx,
+ const struct gl_bitmap_atlas *atlas,
+ GLuint count, const GLubyte *ids);
/*@}*/
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 0e25efbae72..afd2d83cb59 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -72,6 +72,9 @@
#include "vbo/vbo.h"
+#define USE_BITMAP_ATLAS 1
+
+
/**
* Other parts of Mesa (such as the VBO module) can plug into the display
@@ -606,6 +609,261 @@ void mesa_print_display_list(GLuint list);
/**
+ * Does the given display list only contain a single glBitmap call?
+ */
+static bool
+is_bitmap_list(const struct gl_display_list *dlist)
+{
+ const Node *n = dlist->Head;
+ if (n[0].opcode == OPCODE_BITMAP) {
+ n += InstSize[OPCODE_BITMAP];
+ if (n[0].opcode == OPCODE_END_OF_LIST)
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Is the given display list an empty list?
+ */
+static bool
+is_empty_list(const struct gl_display_list *dlist)
+{
+ const Node *n = dlist->Head;
+ return n[0].opcode == OPCODE_END_OF_LIST;
+}
+
+
+/**
+ * Delete/free a gl_bitmap_atlas. Called during context tear-down.
+ */
+void
+_mesa_delete_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas)
+{
+ if (atlas->texObj) {
+ ctx->Driver.DeleteTexture(ctx, atlas->texObj);
+ }
+ free(atlas->glyphs);
+}
+
+
+/**
+ * Lookup a gl_bitmap_atlas by listBase ID.
+ */
+static struct gl_bitmap_atlas *
+lookup_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+ struct gl_bitmap_atlas *atlas;
+
+ assert(listBase > 0);
+ atlas = _mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase);
+ return atlas;
+}
+
+
+/**
+ * Create new bitmap atlas and insert into hash table.
+ */
+static struct gl_bitmap_atlas *
+alloc_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+ struct gl_bitmap_atlas *atlas;
+
+ assert(listBase > 0);
+ assert(_mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase) == NULL);
+
+ atlas = calloc(1, sizeof(*atlas));
+ if (atlas) {
+ _mesa_HashInsert(ctx->Shared->BitmapAtlas, listBase, atlas);
+ }
+
+ return atlas;
+}
+
+
+/**
+ * Try to build a bitmap atlas. This involves examining a sequence of
+ * display lists which contain glBitmap commands and putting the bitmap
+ * images into a texture map (the atlas).
+ * If we succeed, gl_bitmap_atlas::complete will be set to true.
+ * If we fail, gl_bitmap_atlas::incomplete will be set to true.
+ */
+static void
+build_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas,
+ GLuint listBase)
+{
+ unsigned i, row_height = 0, xpos = 0, ypos = 0;
+ GLubyte *map;
+ GLint map_stride;
+
+ assert(atlas);
+ assert(!atlas->complete);
+ assert(atlas->numBitmaps > 0);
+
+ /* We use a rectangle texture (non-normalized coords) for the atlas */
+ assert(ctx->Extensions.NV_texture_rectangle);
+ assert(ctx->Const.MaxTextureRectSize >= 1024);
+
+ atlas->texWidth = 1024;
+ atlas->texHeight = 0; /* determined below */
+
+ atlas->glyphs = malloc(atlas->numBitmaps * sizeof(atlas->glyphs[0]));
+ if (!atlas->glyphs) {
+ /* give up */
+ atlas->incomplete = true;
+ return;
+ }
+
+ /* Loop over the display lists. They should all contain a single glBitmap
+ * call. If not, bail out. Also, compute the position and sizes of each
+ * bitmap in the atlas to determine the texture atlas size.
+ */
+ for (i = 0; i < atlas->numBitmaps; i++) {
+ const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i);
+ const Node *n;
+ struct gl_bitmap_glyph *g = &atlas->glyphs[i];
+ unsigned bitmap_width, bitmap_height;
+ float bitmap_xmove, bitmap_ymove, bitmap_xorig, bitmap_yorig;
+
+ if (!list || is_empty_list(list)) {
+ /* stop here */
+ atlas->numBitmaps = i;
+ break;
+ }
+
+ if (!is_bitmap_list(list)) {
+ /* This list does not contain exactly one glBitmap command. Give up. */
+ atlas->incomplete = true;
+ return;
+ }
+
+ /* get bitmap info from the display list command */
+ n = list->Head;
+ assert(n[0].opcode == OPCODE_BITMAP);
+ bitmap_width = n[1].i;
+ bitmap_height = n[2].i;
+ bitmap_xorig = n[3].f;
+ bitmap_yorig = n[4].f;
+ bitmap_xmove = n[5].f;
+ bitmap_ymove = n[6].f;
+
+ if (xpos + bitmap_width > atlas->texWidth) {
+ /* advance to the next row of the texture */
+ xpos = 0;
+ ypos += row_height;
+ row_height = 0;
+ }
+
+ /* save the bitmap's position in the atlas */
+ g->x = xpos;
+ g->y = ypos;
+ g->w = bitmap_width;
+ g->h = bitmap_height;
+ g->xorig = bitmap_xorig;
+ g->yorig = bitmap_yorig;
+ g->xmove = bitmap_xmove;
+ g->ymove = bitmap_ymove;
+
+ xpos += bitmap_width;
+
+ /* keep track of tallest bitmap in the row */
+ row_height = MAX2(row_height, bitmap_height);
+ }
+
+ /* Now we know the texture height */
+ atlas->texHeight = ypos + row_height;
+
+ if (atlas->texHeight == 0) {
+ /* no glyphs found, give up */
+ goto fail;
+ }
+ else if (atlas->texHeight > ctx->Const.MaxTextureRectSize) {
+ /* too large, give up */
+ goto fail;
+ }
+
+ /* Create atlas texture (texture ID is irrelevant) */
+ atlas->texObj = ctx->Driver.NewTextureObject(ctx, 999, GL_TEXTURE_RECTANGLE);
+ if (!atlas->texObj) {
+ goto out_of_memory;
+ }
+
+ atlas->texObj->Sampler.MinFilter = GL_NEAREST;
+ atlas->texObj->Sampler.MagFilter = GL_NEAREST;
+ atlas->texObj->MaxLevel = 0;
+ atlas->texObj->Immutable = GL_TRUE;
+
+ atlas->texImage = _mesa_get_tex_image(ctx, atlas->texObj,
+ GL_TEXTURE_RECTANGLE, 0);
+ if (!atlas->texImage) {
+ goto out_of_memory;
+ }
+
+ _mesa_init_teximage_fields(ctx, atlas->texImage,
+ atlas->texWidth, atlas->texHeight, 1, 0,
+ GL_ALPHA, MESA_FORMAT_A_UNORM8);
+
+ /* alloc image storage */
+ if (!ctx->Driver.AllocTextureImageBuffer(ctx, atlas->texImage)) {
+ goto out_of_memory;
+ }
+
+ /* map teximage, load with bitmap glyphs */
+ ctx->Driver.MapTextureImage(ctx, atlas->texImage, 0,
+ 0, 0, atlas->texWidth, atlas->texHeight,
+ GL_MAP_WRITE_BIT, &map, &map_stride);
+ if (!map) {
+ goto out_of_memory;
+ }
+
+ /* Background/clear pixels are 0xff, foreground/set pixels are 0x0 */
+ memset(map, 0xff, map_stride * atlas->texHeight);
+
+ for (i = 0; i < atlas->numBitmaps; i++) {
+ const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i);
+ const Node *n = list->Head;
+
+ assert(n[0].opcode == OPCODE_BITMAP ||
+ n[0].opcode == OPCODE_END_OF_LIST);
+
+ if (n[0].opcode == OPCODE_BITMAP) {
+ unsigned bitmap_width = n[1].i;
+ unsigned bitmap_height = n[2].i;
+ unsigned xpos = atlas->glyphs[i].x;
+ unsigned ypos = atlas->glyphs[i].y;
+ const void *bitmap_image = get_pointer(&n[7]);
+
+ assert(atlas->glyphs[i].w == bitmap_width);
+ assert(atlas->glyphs[i].h == bitmap_height);
+
+ /* put the bitmap image into the texture image */
+ _mesa_expand_bitmap(bitmap_width, bitmap_height,
+ &ctx->DefaultPacking, bitmap_image,
+ map + map_stride * ypos + xpos, /* dest addr */
+ map_stride, 0x0);
+ }
+ }
+
+ ctx->Driver.UnmapTextureImage(ctx, atlas->texImage, 0);
+
+ atlas->complete = true;
+
+ return;
+
+out_of_memory:
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "Display list bitmap atlas");
+fail:
+ if (atlas->texObj) {
+ ctx->Driver.DeleteTexture(ctx, atlas->texObj);
+ }
+ free(atlas->glyphs);
+ atlas->glyphs = NULL;
+ atlas->incomplete = true;
+}
+
+
+/**
* Allocate a gl_display_list object with an initial block of storage.
* \param count how many display list nodes/tokens to allocate
*/
@@ -856,6 +1114,30 @@ _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist)
/**
+ * Called by _mesa_HashWalk() to check if a display list which is being
+ * deleted belongs to a bitmap texture atlas.
+ */
+static void
+check_atlas_for_deleted_list(GLuint atlas_id, void *data, void *userData)
+{
+ struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data;
+ GLuint list_id = *((GLuint *) userData); /* the list being deleted */
+
+ /* See if the list_id falls in the range contained in this texture atlas */
+ if (atlas->complete &&
+ list_id >= atlas_id &&
+ list_id < atlas_id + atlas->numBitmaps) {
+ /* Mark the atlas as incomplete so it doesn't get used. But don't
+ * delete it yet since we don't want to try to recreate it in the next
+ * glCallLists.
+ */
+ atlas->complete = false;
+ atlas->incomplete = true;
+ }
+}
+
+
+/**
* Destroy a display list and remove from hash table.
* \param list - display list number
*/
@@ -871,6 +1153,16 @@ destroy_list(struct gl_context *ctx, GLuint list)
if (!dlist)
return;
+ if (is_bitmap_list(dlist)) {
+ /* If we're destroying a simple glBitmap display list, there's a
+ * chance that we're destroying a bitmap image that's in a texture
+ * atlas. Examine all atlases to see if that's the case. There's
+ * usually few (if any) atlases so this isn't expensive.
+ */
+ _mesa_HashWalk(ctx->Shared->BitmapAtlas,
+ check_atlas_for_deleted_list, &list);
+ }
+
_mesa_delete_list(ctx, dlist);
_mesa_HashRemove(ctx->Shared->DisplayList, list);
}
@@ -8895,6 +9187,18 @@ _mesa_DeleteLists(GLuint list, GLsizei range)
_mesa_error(ctx, GL_INVALID_VALUE, "glDeleteLists");
return;
}
+
+ if (range > 1) {
+ /* We may be deleting a set of bitmap lists. See if there's a
+ * bitmap atlas to free.
+ */
+ struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, list);
+ if (atlas) {
+ _mesa_delete_bitmap_atlas(ctx, atlas);
+ _mesa_HashRemove(ctx->Shared->BitmapAtlas, list);
+ }
+ }
+
for (i = list; i < list + range; i++) {
destroy_list(ctx, i);
}
@@ -8936,6 +9240,24 @@ _mesa_GenLists(GLsizei range)
}
}
+ if (USE_BITMAP_ATLAS &&
+ range > 16 &&
+ ctx->Driver.DrawAtlasBitmaps) {
+ /* "range > 16" is a rough heuristic to guess when glGenLists might be
+ * used to allocate display lists for glXUseXFont or wglUseFontBitmaps.
+ * Create the empty atlas now.
+ */
+ struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, base);
+ if (!atlas) {
+ atlas = alloc_bitmap_atlas(ctx, base);
+ }
+ if (atlas) {
+ /* Atlas _should_ be new/empty now, but clobbering is OK */
+ assert(atlas->numBitmaps == 0);
+ atlas->numBitmaps = range;
+ }
+ }
+
mtx_unlock(&ctx->Shared->Mutex);
return base;
@@ -9085,6 +9407,65 @@ _mesa_CallList(GLuint list)
/**
+ * Try to execute a glCallLists() command where the display lists contain
+ * glBitmap commands with a texture atlas.
+ * \return true for success, false otherwise
+ */
+static bool
+render_bitmap_atlas(struct gl_context *ctx, GLsizei n, GLenum type,
+ const void *lists)
+{
+ struct gl_bitmap_atlas *atlas;
+ int i;
+
+ if (!USE_BITMAP_ATLAS ||
+ !ctx->Current.RasterPosValid ||
+ ctx->List.ListBase == 0 ||
+ type != GL_UNSIGNED_BYTE ||
+ !ctx->Driver.DrawAtlasBitmaps) {
+ /* unsupported */
+ return false;
+ }
+
+ atlas = lookup_bitmap_atlas(ctx, ctx->List.ListBase);
+
+ if (!atlas) {
+ /* Even if glGenLists wasn't called, we can still try to create
+ * the atlas now.
+ */
+ atlas = alloc_bitmap_atlas(ctx, ctx->List.ListBase);
+ }
+
+ if (atlas && !atlas->complete && !atlas->incomplete) {
+ /* Try to build the bitmap atlas now.
+ * If the atlas was created in glGenLists, we'll have recorded the
+ * number of lists (bitmaps). Otherwise, take a guess at 256.
+ */
+ if (atlas->numBitmaps == 0)
+ atlas->numBitmaps = 256;
+ build_bitmap_atlas(ctx, atlas, ctx->List.ListBase);
+ }
+
+ if (!atlas || !atlas->complete) {
+ return false;
+ }
+
+ /* check that all display list IDs are in the atlas */
+ for (i = 0; i < n; i++) {
+ const GLubyte *ids = (const GLubyte *) lists;
+
+ if (ids[i] >= atlas->numBitmaps) {
+ return false;
+ }
+ }
+
+ ctx->Driver.DrawAtlasBitmaps(ctx, atlas, n, (const GLubyte *) lists);
+
+ return true;
+}
+
+
+/**
* Execute glCallLists: call multiple display lists.
*/
void GLAPIENTRY
@@ -9123,6 +9504,10 @@ _mesa_CallLists(GLsizei n, GLenum type, const GLvoid * lists)
return;
}
+ if (render_bitmap_atlas(ctx, n, type, lists)) {
+ return;
+ }
+
/* Save the CompileFlag status, turn it off, execute display list,
* and restore the CompileFlag.
*/
diff --git a/src/mesa/main/dlist.h b/src/mesa/main/dlist.h
index 7a23208ba5a..22b696f50c1 100644
--- a/src/mesa/main/dlist.h
+++ b/src/mesa/main/dlist.h
@@ -36,6 +36,44 @@
#include "main/mtypes.h"
+/**
+ * Describes the location and size of a glBitmap image in a texture atlas.
+ */
+struct gl_bitmap_glyph
+{
+ unsigned short x, y, w, h; /**< position and size in the texture */
+ float xorig, yorig; /**< bitmap origin */
+ float xmove, ymove; /**< rasterpos move */
+};
+
+
+/**
+ * Describes a set of glBitmap display lists which live in a texture atlas.
+ * The idea is when we see a code sequence of glListBase(b), glCallLists(n)
+ * we're probably drawing bitmap font glyphs. We try to put all the bitmap
+ * glyphs into one texture map then render the glCallLists as a textured
+ * quadstrip.
+ */
+struct gl_bitmap_atlas
+{
+ bool complete; /**< Is the atlas ready to use? */
+ bool incomplete; /**< Did we fail to construct this atlas? */
+
+ unsigned numBitmaps;
+ unsigned texWidth, texHeight;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+
+ unsigned glyphHeight;
+
+ struct gl_bitmap_glyph *glyphs;
+};
+
+void
+_mesa_delete_bitmap_atlas(struct gl_context *ctx,
+ struct gl_bitmap_atlas *atlas);
+
+
GLboolean GLAPIENTRY
_mesa_IsList(GLuint list);
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 3fd3c2747ea..3985457f21a 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -749,7 +749,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
return;
/* GL_ARB_texture_cube_map */
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
goto invalid_enum_error;
CHECK_EXTENSION(ARB_texture_cube_map, cap);
@@ -1450,7 +1450,7 @@ _mesa_IsEnabled( GLenum cap )
return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled;
/* GL_ARB_texture_cube_map */
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
CHECK_EXTENSION(ARB_texture_cube_map);
return is_texture_enabled(ctx, TEXTURE_CUBE_BIT);
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d1e3a99fdc0..68f36178f32 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -224,6 +224,7 @@ EXT(EXT_subtexture , dummy_true
EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006)
+EXT(EXT_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014)
EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004)
EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006)
EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004)
@@ -325,6 +326,7 @@ EXT(OES_point_sprite , ARB_point_sprite
EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003)
EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003)
EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_shader_image_atomic , ARB_shader_image_load_store , x , x , x , 31, 2015)
EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003)
EXT(OES_standard_derivatives , OES_standard_derivatives , x , x , x , ES2, 2005)
EXT(OES_stencil1 , dummy_false , x , x , x , x , 2005)
@@ -333,6 +335,7 @@ EXT(OES_stencil8 , dummy_true
EXT(OES_stencil_wrap , dummy_true , x , x , ES1, x , 2002)
EXT(OES_surfaceless_context , dummy_true , x , x , ES1, ES2, 2012)
EXT(OES_texture_3D , dummy_true , x , x , x , ES2, 2005)
+EXT(OES_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014)
EXT(OES_texture_cube_map , ARB_texture_cube_map , x , x , ES1, x , 2007)
EXT(OES_texture_env_crossbar , ARB_texture_env_crossbar , x , x , ES1, x , 2005)
EXT(OES_texture_float , OES_texture_float , x , x , x , ES2, 2005)
@@ -341,6 +344,7 @@ EXT(OES_texture_half_float , OES_texture_half_float
EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005)
EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005)
EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005)
+EXT(OES_texture_stencil8 , ARB_texture_stencil8 , x , x , x , 30, 2014)
EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014)
EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010)
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 1b9b692f001..1f10050c891 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -811,7 +811,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format,
break;
}
- baseFormat = _mesa_get_format_base_format(texImage->TexFormat);
+ baseFormat = texImage->_BaseFormat;
if (format == GL_COLOR) {
if (!_mesa_is_legal_color_format(ctx, baseFormat)) {
@@ -868,8 +868,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format,
}
}
else if (att->Type == GL_RENDERBUFFER_EXT) {
- const GLenum baseFormat =
- _mesa_get_format_base_format(att->Renderbuffer->Format);
+ const GLenum baseFormat = att->Renderbuffer->_BaseFormat;
assert(att->Renderbuffer);
if (!att->Renderbuffer->InternalFormat ||
diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 5fdabd5b97f..d16d69c3795 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -179,6 +179,63 @@ _mesa_compute_rgba2base2rgba_component_mapping(GLenum baseFormat, uint8_t *map)
}
}
+
+/**
+ * Special case conversion function to swap r/b channels from the source
+ * image to the dest image.
+ */
+static void
+convert_ubyte_rgba_to_bgra(size_t width, size_t height,
+ const uint8_t *src, size_t src_stride,
+ uint8_t *dst, size_t dst_stride)
+{
+ int row;
+
+ if (sizeof(void *) == 8 &&
+ src_stride % 8 == 0 &&
+ dst_stride % 8 == 0 &&
+ (GLsizeiptr) src % 8 == 0 &&
+ (GLsizeiptr) dst % 8 == 0) {
+ /* use 64-bit word to swizzle two 32-bit pixels. We need 8-byte
+ * alignment for src/dst addresses and strides.
+ */
+ for (row = 0; row < height; row++) {
+ const GLuint64 *s = (const GLuint64 *) src;
+ GLuint64 *d = (GLuint64 *) dst;
+ int i;
+ for (i = 0; i < width/2; i++) {
+ d[i] = ( (s[i] & 0xff00ff00ff00ff00) |
+ ((s[i] & 0xff000000ff) << 16) |
+ ((s[i] & 0xff000000ff0000) >> 16));
+ }
+ if (width & 1) {
+ /* handle the case of odd widths */
+ const GLuint s = ((const GLuint *) src)[width - 1];
+ GLuint *d = (GLuint *) dst + width - 1;
+ *d = ( (s & 0xff00ff00) |
+ ((s & 0xff) << 16) |
+ ((s & 0xff0000) >> 16));
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else {
+ for (row = 0; row < height; row++) {
+ const GLuint *s = (const GLuint *) src;
+ GLuint *d = (GLuint *) dst;
+ int i;
+ for (i = 0; i < width; i++) {
+ d[i] = ( (s[i] & 0xff00ff00) |
+ ((s[i] & 0xff) << 16) |
+ ((s[i] & 0xff0000) >> 16));
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+ }
+}
+
+
/**
* This can be used to convert between most color formats.
*
@@ -299,11 +356,18 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
return;
} else if (src_array_format == RGBA8_UBYTE) {
assert(!_mesa_is_format_integer_color(dst_format));
- for (row = 0; row < height; ++row) {
- _mesa_pack_ubyte_rgba_row(dst_format, width,
- (const uint8_t (*)[4])src, dst);
- src += src_stride;
- dst += dst_stride;
+
+ if (dst_format == MESA_FORMAT_B8G8R8A8_UNORM) {
+ convert_ubyte_rgba_to_bgra(width, height, src, src_stride,
+ dst, dst_stride);
+ }
+ else {
+ for (row = 0; row < height; ++row) {
+ _mesa_pack_ubyte_rgba_row(dst_format, width,
+ (const uint8_t (*)[4])src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ }
}
return;
} else if (src_array_format == RGBA32_UINT &&
diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 85f7b6b5664..816f12bf9e2 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -131,11 +131,14 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
internalformat, buffer);
break;
case GL_NUM_SAMPLE_COUNTS: {
- if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalformat)) {
+ if ((ctx->API == API_OPENGLES2 && ctx->Version == 30) &&
+ _mesa_is_enum_format_integer(internalformat)) {
/* From GL ES 3.0 specification, section 6.1.15 page 236: "Since
* multisampling is not supported for signed and unsigned integer
* internal formats, the value of NUM_SAMPLE_COUNTS will be zero
* for such formats.
+ *
+ * Such a restriction no longer exists in GL ES 3.1.
*/
buffer[0] = 0;
count = 1;
diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c
index 4ec8385ec2f..6c2d31dbcf3 100644
--- a/src/mesa/main/genmipmap.c
+++ b/src/mesa/main/genmipmap.c
@@ -123,7 +123,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
GLuint face;
for (face = 0; face < 6; face++) {
ctx->Driver.GenerateMipmap(ctx,
- GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB + face, texObj);
+ GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, texObj);
}
}
else {
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 9005dc5897d..f40c5705813 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -642,7 +642,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_3D:
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_RECTANGLE_NV:
case GL_TEXTURE_EXTERNAL_OES:
v->value_bool = _mesa_IsEnabled(d->pname);
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 164095c103c..07d2d20df7a 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -455,13 +455,13 @@ descriptor=[
# GL_ARB_compute_shader / GLES 3.1
[ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
- [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONTEXT_INT(Const.MaxComputeSharedMemorySize), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms), extra_ARB_compute_shader_es31" ],
[ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ],
[ "MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents), extra_ARB_compute_shader_es31" ],
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index f5284447b6e..987cd0db45c 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -3153,6 +3153,14 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
}
break;
+ case GL_STENCIL_INDEX:
+ if (!_mesa_has_OES_texture_stencil8(ctx) ||
+ type != GL_UNSIGNED_BYTE ||
+ internalFormat != GL_STENCIL_INDEX8) {
+ return GL_INVALID_OPERATION;
+ }
+ break;
+
case GL_ALPHA:
case GL_LUMINANCE:
case GL_LUMINANCE_ALPHA:
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index 99f253cd373..4d6ab6f2b56 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -408,9 +408,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
const GLint srcStride = _mesa_image_row_stride(unpack, width,
GL_COLOR_INDEX, GL_BITMAP);
GLint row, col;
-
-#define SET_PIXEL(COL, ROW) \
- destBuffer[(ROW) * destStride + (COL)] = onValue;
+ GLubyte *dstRow = destBuffer;
for (row = 0; row < height; row++) {
const GLubyte *src = srcRow;
@@ -421,7 +419,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
for (col = 0; col < width; col++) {
if (*src & mask) {
- SET_PIXEL(col, row);
+ dstRow[col] = onValue;
}
if (mask == 128U) {
@@ -443,7 +441,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
for (col = 0; col < width; col++) {
if (*src & mask) {
- SET_PIXEL(col, row);
+ dstRow[col] = onValue;
}
if (mask == 1U) {
@@ -461,9 +459,8 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height,
}
srcRow += srcStride;
+ dstRow += destStride;
} /* row */
-
-#undef SET_PIXEL
}
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 230ebbc67f4..14cd58870f7 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -89,7 +89,7 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment)
if (err)
return NULL;
return mem;
-#elif defined(_WIN32) && defined(_MSC_VER)
+#elif defined(_WIN32)
return _aligned_malloc(bytes, alignment);
#else
uintptr_t ptr, buf;
@@ -131,7 +131,7 @@ _mesa_align_calloc(size_t bytes, unsigned long alignment)
}
return mem;
-#elif defined(_WIN32) && defined(_MSC_VER)
+#elif defined(_WIN32)
void *mem;
mem = _aligned_malloc(bytes, alignment);
@@ -178,7 +178,7 @@ _mesa_align_free(void *ptr)
{
#if defined(HAVE_POSIX_MEMALIGN)
free(ptr);
-#elif defined(_WIN32) && defined(_MSC_VER)
+#elif defined(_WIN32)
_aligned_free(ptr);
#else
if (ptr) {
@@ -196,7 +196,7 @@ void *
_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
unsigned long alignment)
{
-#if defined(_WIN32) && defined(_MSC_VER)
+#if defined(_WIN32)
(void) oldSize;
return _aligned_realloc(oldBuffer, newSize, alignment);
#else
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index ad7af5c1d8c..d96d666e15f 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -83,9 +83,6 @@ typedef union { GLfloat f; GLint i; GLuint u; } fi_type;
#if defined(_MSC_VER)
-#if _MSC_VER < 1800 /* Not req'd on VS2013 and above */
-#define strtoll(p, e, b) _strtoi64(p, e, b)
-#endif /* _MSC_VER < 1800 */
#define strcasecmp(s1, s2) _stricmp(s1, s2)
#endif
/*@}*/
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index 50469956c6e..5a02780b960 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1715,12 +1715,12 @@ _mesa_generate_mipmap_level(GLenum target,
dstWidth, dstData[0]);
break;
case GL_TEXTURE_2D:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
make_2d_mipmap(datatype, comps, border,
srcWidth, srcHeight, srcData[0], srcRowStride,
dstWidth, dstHeight, dstData[0], dstRowStride);
@@ -1838,12 +1838,7 @@ _mesa_prepare_mipmap_level(struct gl_context *ctx,
for (face = 0; face < numFaces; face++) {
struct gl_texture_image *dstImage;
- GLenum target;
-
- if (numFaces == 1)
- target = texObj->Target;
- else
- target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
+ const GLenum target = _mesa_cube_face_target(texObj->Target, face);
dstImage = _mesa_get_tex_image(ctx, texObj, target, level);
if (!dstImage) {
@@ -2024,7 +2019,7 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
/* only two types of compressed textures at this time */
assert(texObj->Target == GL_TEXTURE_2D ||
texObj->Target == GL_TEXTURE_2D_ARRAY ||
- texObj->Target == GL_TEXTURE_CUBE_MAP_ARB ||
+ texObj->Target == GL_TEXTURE_CUBE_MAP ||
texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY);
/*
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a66b56c62bf..2ca9cbf808a 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -44,6 +44,7 @@
#include "math/m_matrix.h" /* GLmatrix */
#include "compiler/shader_enums.h"
#include "main/formats.h" /* MESA_FORMAT_COUNT */
+#include "compiler/glsl/list.h"
#ifdef __cplusplus
@@ -1872,6 +1873,8 @@ typedef enum
PROGRAM_UNDEFINED, /**< Invalid/TBD value */
PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */
PROGRAM_BUFFER, /**< for shader buffers, compile-time only */
+ PROGRAM_MEMORY, /**< for shared, global and local memory */
+ PROGRAM_IMAGE, /**< for shader images, compile-time only */
PROGRAM_FILE_MAX
} gl_register_file;
@@ -2044,6 +2047,11 @@ struct gl_compute_program
* Size specified using local_size_{x,y,z}.
*/
unsigned LocalSize[3];
+
+ /**
+ * Size of shared variables accessed by the compute shader.
+ */
+ unsigned SharedSize;
};
@@ -2769,6 +2777,13 @@ struct gl_shader_program
struct gl_uniform_storage **UniformRemapTable;
/**
+ * Sometimes there are empty slots left over in UniformRemapTable after we
+ * allocate slots to explicit locations. This list stores the blocks of
+ * continuous empty slots inside UniformRemapTable.
+ */
+ struct exec_list EmptyUniformLocations;
+
+ /**
* Size of the gl_ClipDistance array that is output from the last pipeline
* stage before the fragment shader.
*/
@@ -3044,6 +3059,7 @@ struct gl_shared_state
mtx_t Mutex; /**< for thread safety */
GLint RefCount; /**< Reference count */
struct _mesa_HashTable *DisplayList; /**< Display lists hash table */
+ struct _mesa_HashTable *BitmapAtlas; /**< For optimized glBitmap text */
struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */
/** Default texture objects (shared by all texture units) */
@@ -3727,6 +3743,7 @@ struct gl_constants
GLuint MaxComputeWorkGroupCount[3]; /* Array of x, y, z dimensions */
GLuint MaxComputeWorkGroupSize[3]; /* Array of x, y, z dimensions */
GLuint MaxComputeWorkGroupInvocations;
+ GLuint MaxComputeSharedMemorySize;
/** GL_ARB_gpu_shader5 */
GLfloat MinFragmentInterpolationOffset;
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 0e4a0af0b0f..af17be2a3f4 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -964,8 +964,5 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize,
return;
}
- if (pipe->InfoLog)
- _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog);
- else
- *length = 0;
+ _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog);
}
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index 50659b0cd21..0d9f8aecf08 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -357,10 +357,6 @@ _mesa_GetProgramResourceiv(GLuint program, GLenum programInterface,
return;
}
- /* No need to write any properties, user requested none. */
- if (bufSize == 0)
- return;
-
_mesa_get_program_resourceiv(shProg, programInterface, index,
propCount, props, bufSize, length, params);
}
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 470182ab23d..882d863c1c7 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -582,7 +582,7 @@ read_rgba_pixels( struct gl_context *ctx,
void *luminance;
uint32_t luminance_format;
- luminance_stride = width * sizeof(GL_FLOAT);
+ luminance_stride = width * sizeof(GLfloat);
if (format == GL_LUMINANCE_ALPHA)
luminance_stride *= 2;
luminance_bytes = height * luminance_stride;
diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index fe15508696e..ca366d967ab 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -1518,7 +1518,8 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params)
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
if (!sampObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
+ _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+ GL_INVALID_OPERATION : GL_INVALID_VALUE),
"glGetSamplerParameterIiv(sampler %u)",
sampler);
return;
@@ -1593,7 +1594,8 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params)
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
if (!sampObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
+ _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+ GL_INVALID_OPERATION : GL_INVALID_VALUE),
"glGetSamplerParameterIuiv(sampler %u)",
sampler);
return;
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index df92c0912af..cdf15b48a0d 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2124,6 +2124,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
int i;
for (i = 0; i < 3; i++)
dst_cp->LocalSize[i] = src->Comp.LocalSize[i];
+ dst_cp->SharedSize = src->Comp.SharedSize;
break;
}
default:
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index b9f7bb65fb6..49e5f028045 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -65,6 +65,7 @@ _mesa_alloc_shared_state(struct gl_context *ctx)
mtx_init(&shared->Mutex, mtx_plain);
shared->DisplayList = _mesa_NewHashTable();
+ shared->BitmapAtlas = _mesa_NewHashTable();
shared->TexObjects = _mesa_NewHashTable();
shared->Programs = _mesa_NewHashTable();
@@ -144,6 +145,18 @@ delete_displaylist_cb(GLuint id, void *data, void *userData)
/**
+ * Callback for deleting a bitmap atlas. Called by _mesa_HashDeleteAll().
+ */
+static void
+delete_bitmap_atlas_cb(GLuint id, void *data, void *userData)
+{
+ struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data;
+ struct gl_context *ctx = (struct gl_context *) userData;
+ _mesa_delete_bitmap_atlas(ctx, atlas);
+}
+
+
+/**
* Callback for deleting a texture object. Called by _mesa_HashDeleteAll().
*/
static void
@@ -309,6 +322,8 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared)
*/
_mesa_HashDeleteAll(shared->DisplayList, delete_displaylist_cb, ctx);
_mesa_DeleteHashTable(shared->DisplayList);
+ _mesa_HashDeleteAll(shared->BitmapAtlas, delete_bitmap_atlas_cb, ctx);
+ _mesa_DeleteHashTable(shared->BitmapAtlas);
_mesa_HashWalk(shared->ShaderObjects, free_shader_program_data_cb, ctx);
_mesa_HashDeleteAll(shared->ShaderObjects, delete_shader_cb, ctx);
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index e6412962251..24e3d189091 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2436,6 +2436,16 @@ const struct function gles3_functions_possible[] = {
{ "glGetFragDataIndexEXT", 30, -1 },
{ "glBindFragDataLocationEXT", 30, -1 },
+ /* GL_OES_texture_border_clamp */
+ { "glTexParameterIivOES", 30, -1 },
+ { "glTexParameterIuivOES", 30, -1 },
+ { "glGetTexParameterIivOES", 30, -1 },
+ { "glGetTexParameterIuivOES", 30, -1 },
+ { "glSamplerParameterIivOES", 30, -1 },
+ { "glSamplerParameterIuivOES", 30, -1 },
+ { "glGetSamplerParameterIivOES", 30, -1 },
+ { "glGetSamplerParameterIuivOES", 30, -1 },
+
{ NULL, 0, -1 }
};
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index b273aaac2a1..06bc8f1ba15 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -869,12 +869,12 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa)
* the targets from table 8.19 (for GetTexImage and GetnTexImage *only*),
* or TEXTURE_CUBE_MAP (for GetTextureImage *only*)." (Emphasis added.)
*/
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
return dsa ? GL_FALSE : ctx->Extensions.ARB_texture_cube_map;
case GL_TEXTURE_CUBE_MAP:
return dsa ? GL_TRUE : GL_FALSE;
@@ -886,7 +886,7 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa)
/**
* Wrapper for _mesa_select_tex_image() which can handle target being
- * GL_TEXTURE_CUBE_MAP_ARB in which case we use zoffset to select a cube face.
+ * GL_TEXTURE_CUBE_MAP in which case we use zoffset to select a cube face.
* This can happen for glGetTextureImage and glGetTextureSubImage (DSA
* functions).
*/
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 50141be8693..8a4c6286cbe 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -116,20 +116,6 @@ adjust_for_oes_float_texture(GLenum format, GLenum type)
return format;
}
-/**
- * For cube map faces, return a face index in [0,5].
- * For other targets return 0;
- */
-GLuint
-_mesa_tex_target_to_face(GLenum target)
-{
- if (_mesa_is_cube_face(target))
- return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- else
- return 0;
-}
-
-
/**
* Install gl_texture_image in a gl_texture_object according to the target
@@ -273,15 +259,15 @@ proxy_target(GLenum target)
case GL_TEXTURE_3D:
case GL_PROXY_TEXTURE_3D:
return GL_PROXY_TEXTURE_3D;
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_ARB:
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
- return GL_PROXY_TEXTURE_CUBE_MAP_ARB;
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ case GL_TEXTURE_CUBE_MAP:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
+ return GL_PROXY_TEXTURE_CUBE_MAP;
case GL_TEXTURE_RECTANGLE_NV:
case GL_PROXY_TEXTURE_RECTANGLE_NV:
return GL_PROXY_TEXTURE_RECTANGLE_NV;
@@ -472,13 +458,13 @@ _mesa_max_texture_levels(struct gl_context *ctx, GLenum target)
case GL_PROXY_TEXTURE_3D:
return ctx->Const.Max3DTextureLevels;
case GL_TEXTURE_CUBE_MAP:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map
? ctx->Const.MaxCubeTextureLevels : 0;
case GL_TEXTURE_RECTANGLE_NV:
@@ -1016,7 +1002,7 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target,
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
maxSize = 1 << (ctx->Const.MaxCubeTextureLevels - 1);
maxSize >>= level;
if (width != height)
@@ -2299,8 +2285,10 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
}
if (baseFormat == GL_DEPTH_COMPONENT ||
baseFormat == GL_DEPTH_STENCIL ||
+ baseFormat == GL_STENCIL_INDEX ||
rb_base_format == GL_DEPTH_COMPONENT ||
rb_base_format == GL_DEPTH_STENCIL ||
+ rb_base_format == GL_STENCIL_INDEX ||
((baseFormat == GL_LUMINANCE_ALPHA ||
baseFormat == GL_ALPHA) &&
rb_base_format != GL_RGBA) ||
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index 5df36c59a28..17f2c908ecc 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -43,10 +43,63 @@ extern "C" {
static inline GLboolean
_mesa_is_cube_face(GLenum target)
{
- return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB &&
- target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB);
+ return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X &&
+ target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z);
}
+
+/**
+ * Return number of faces for a texture target. This will be 6 for
+ * cube maps and 1 otherwise.
+ * NOTE: this function is not used for cube map arrays which operate
+ * more like 2D arrays than cube maps.
+ */
+static inline GLuint
+_mesa_num_tex_faces(GLenum target)
+{
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
+ return 6;
+ default:
+ return 1;
+ }
+}
+
+
+/**
+ * If the target is GL_TEXTURE_CUBE_MAP, return one of the
+ * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z targets corresponding to
+ * the face parameter.
+ * Else, return target as-is.
+ */
+static inline GLenum
+_mesa_cube_face_target(GLenum target, unsigned face)
+{
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ assert(face < 6);
+ return GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
+ }
+ else {
+ return target;
+ }
+}
+
+
+/**
+ * For cube map faces, return a face index in [0,5].
+ * For other targets return 0;
+ */
+static inline GLuint
+_mesa_tex_target_to_face(GLenum target)
+{
+ if (_mesa_is_cube_face(target))
+ return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+ else
+ return 0;
+}
+
+
/** Are any of the dimensions of given texture equal to zero? */
static inline GLboolean
_mesa_is_zero_size_texture(const struct gl_texture_image *texImage)
@@ -131,9 +184,6 @@ extern GLboolean
_mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
GLenum intFormat, GLenum *error);
-extern GLuint
-_mesa_tex_target_to_face(GLenum target);
-
extern GLint
_mesa_get_texture_dimensions(GLenum target);
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index e926c7b6cd2..d8407f04340 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -171,16 +171,16 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
return texUnit->CurrentTex[TEXTURE_3D_INDEX];
case GL_PROXY_TEXTURE_3D:
return ctx->Texture.ProxyTex[TEXTURE_3D_INDEX];
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ case GL_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map
? texUnit->CurrentTex[TEXTURE_CUBE_INDEX] : NULL;
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map
? ctx->Texture.ProxyTex[TEXTURE_CUBE_INDEX] : NULL;
case GL_TEXTURE_CUBE_MAP_ARRAY:
@@ -239,7 +239,7 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
* \param shared the shared GL state structure to contain the texture object
* \param name integer name for the texture object
* \param target either GL_TEXTURE_1D, GL_TEXTURE_2D, GL_TEXTURE_3D,
- * GL_TEXTURE_CUBE_MAP_ARB or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake
+ * GL_TEXTURE_CUBE_MAP or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake
* of GenTextures()
*
* \return pointer to new texture object.
@@ -270,7 +270,7 @@ _mesa_initialize_texture_object( struct gl_context *ctx,
target == GL_TEXTURE_1D ||
target == GL_TEXTURE_2D ||
target == GL_TEXTURE_3D ||
- target == GL_TEXTURE_CUBE_MAP_ARB ||
+ target == GL_TEXTURE_CUBE_MAP ||
target == GL_TEXTURE_RECTANGLE_NV ||
target == GL_TEXTURE_1D_ARRAY_EXT ||
target == GL_TEXTURE_2D_ARRAY_EXT ||
@@ -513,7 +513,7 @@ valid_texture_object(const struct gl_texture_object *tex)
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_3D:
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_RECTANGLE_NV:
case GL_TEXTURE_1D_ARRAY_EXT:
case GL_TEXTURE_2D_ARRAY_EXT:
@@ -725,7 +725,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
case GL_TEXTURE_3D:
maxLevels = ctx->Const.Max3DTextureLevels;
break;
- case GL_TEXTURE_CUBE_MAP_ARB:
+ case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_CUBE_MAP_ARRAY:
maxLevels = ctx->Const.MaxCubeTextureLevels;
break;
@@ -768,7 +768,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
return;
}
- if (t->Target == GL_TEXTURE_CUBE_MAP_ARB) {
+ if (t->Target == GL_TEXTURE_CUBE_MAP) {
/* Make sure that all six cube map level 0 images are the same size and
* format.
* Note: we know that the image's width==height (we enforce that
@@ -1036,12 +1036,7 @@ _mesa_get_fallback_texture(struct gl_context *ctx, gl_texture_index tex)
/* need a loop here just for cube maps */
for (face = 0; face < numFaces; face++) {
- GLenum faceTarget;
-
- if (target == GL_TEXTURE_CUBE_MAP)
- faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
- else
- faceTarget = target;
+ const GLenum faceTarget = _mesa_cube_face_target(target, face);
/* initialize level[0] texture image */
texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, 0);
diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h
index 8421337de4d..378d87a1cbb 100644
--- a/src/mesa/main/texobj.h
+++ b/src/mesa/main/texobj.h
@@ -120,25 +120,6 @@ _mesa_unlock_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
}
-/**
- * Return number of faces for a texture target. This will be 6 for
- * cube maps (and cube map arrays) and 1 otherwise.
- * NOTE: this function is not used for cube map arrays which operate
- * more like 2D arrays than cube maps.
- */
-static inline GLuint
-_mesa_num_tex_faces(GLenum target)
-{
- switch (target) {
- case GL_TEXTURE_CUBE_MAP:
- case GL_PROXY_TEXTURE_CUBE_MAP:
- return 6;
- default:
- return 1;
- }
-}
-
-
/** Is the texture "complete" with respect to the given sampler state? */
static inline GLboolean
_mesa_is_texture_complete(const struct gl_texture_object *texObj,
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 89f286cc05e..20770a77e15 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -72,7 +72,7 @@ validate_texture_wrap_mode(struct gl_context * ctx, GLenum target, GLenum wrap)
break;
case GL_CLAMP_TO_BORDER:
- supported = is_desktop_gl && e->ARB_texture_border_clamp
+ supported = ctx->API != API_OPENGLES && e->ARB_texture_border_clamp
&& (target != GL_TEXTURE_EXTERNAL_OES);
break;
@@ -500,9 +500,7 @@ set_tex_parameteri(struct gl_context *ctx,
goto invalid_pname;
case GL_DEPTH_STENCIL_TEXTURE_MODE:
- if ((_mesa_is_desktop_gl(ctx) &&
- ctx->Extensions.ARB_stencil_texturing) ||
- _mesa_is_gles31(ctx)) {
+ if (_mesa_has_ARB_stencil_texturing(ctx) || _mesa_is_gles31(ctx)) {
bool stencil = params[0] == GL_STENCIL_INDEX;
if (!stencil && params[0] != GL_DEPTH_COMPONENT)
goto invalid_param;
@@ -719,7 +717,8 @@ set_tex_parameterf(struct gl_context *ctx,
break;
case GL_TEXTURE_BORDER_COLOR:
- if (!_mesa_is_desktop_gl(ctx))
+ if (ctx->API == API_OPENGLES ||
+ !ctx->Extensions.ARB_texture_border_clamp)
goto invalid_pname;
if (!target_allows_setting_sampler_parameters(texObj->Target))
@@ -1215,12 +1214,12 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target,
return GL_TRUE;
case GL_TEXTURE_2D_ARRAY_EXT:
return ctx->Extensions.EXT_texture_array;
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
return ctx->Extensions.ARB_texture_cube_map;
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
@@ -1237,7 +1236,7 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target,
case GL_PROXY_TEXTURE_2D:
case GL_PROXY_TEXTURE_3D:
return GL_TRUE;
- case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+ case GL_PROXY_TEXTURE_CUBE_MAP:
return ctx->Extensions.ARB_texture_cube_map;
case GL_TEXTURE_CUBE_MAP_ARRAY_ARB:
case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY_ARB:
@@ -1312,6 +1311,7 @@ get_tex_level_parameter_image(struct gl_context *ctx,
dummy_image.TexFormat = MESA_FORMAT_NONE;
dummy_image.InternalFormat = GL_RGBA;
dummy_image._BaseFormat = GL_NONE;
+ dummy_image.FixedSampleLocations = GL_TRUE;
img = &dummy_image;
}
@@ -1736,7 +1736,8 @@ get_tex_parameterfv(struct gl_context *ctx,
*params = ENUM_TO_FLOAT(obj->Sampler.WrapR);
break;
case GL_TEXTURE_BORDER_COLOR:
- if (!_mesa_is_desktop_gl(ctx))
+ if (ctx->API == API_OPENGLES ||
+ !ctx->Extensions.ARB_texture_border_clamp)
goto invalid_pname;
if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
@@ -1819,7 +1820,7 @@ get_tex_parameterfv(struct gl_context *ctx,
*params = (GLfloat) obj->DepthMode;
break;
case GL_DEPTH_STENCIL_TEXTURE_MODE:
- if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing)
+ if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx))
goto invalid_pname;
*params = (GLfloat)
(obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT);
@@ -1970,7 +1971,8 @@ get_tex_parameteriv(struct gl_context *ctx,
*params = (GLint) obj->Sampler.WrapR;
break;
case GL_TEXTURE_BORDER_COLOR:
- if (!_mesa_is_desktop_gl(ctx))
+ if (ctx->API == API_OPENGLES ||
+ !ctx->Extensions.ARB_texture_border_clamp)
goto invalid_pname;
{
@@ -2054,7 +2056,7 @@ get_tex_parameteriv(struct gl_context *ctx,
*params = (GLint) obj->DepthMode;
break;
case GL_DEPTH_STENCIL_TEXTURE_MODE:
- if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing)
+ if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx))
goto invalid_pname;
*params = (GLint)
(obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT);
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 9d88554d945..9ee5c6974df 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -786,7 +786,7 @@ alloc_proxy_textures( struct gl_context *ctx )
GL_TEXTURE_2D_ARRAY_EXT,
GL_TEXTURE_1D_ARRAY_EXT,
GL_TEXTURE_EXTERNAL_OES,
- GL_TEXTURE_CUBE_MAP_ARB,
+ GL_TEXTURE_CUBE_MAP,
GL_TEXTURE_3D,
GL_TEXTURE_RECTANGLE_NV,
GL_TEXTURE_2D,
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index d7671738b18..c33b1095900 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -87,9 +87,6 @@ enum {
* Texture image storage function.
*/
typedef GLboolean (*StoreTexImageFunc)(TEXSTORE_PARAMS);
-static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE };
-static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE };
-static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };
/**
diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
index 04b7d73da5c..316d8280338 100644
--- a/src/mesa/main/textureview.c
+++ b/src/mesa/main/textureview.c
@@ -211,10 +211,7 @@ initialize_texture_fields(struct gl_context *ctx,
for (level = 0; level < levels; level++) {
for (face = 0; face < numFaces; face++) {
struct gl_texture_image *texImage;
- GLenum faceTarget = target;
-
- if (target == GL_TEXTURE_CUBE_MAP)
- faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face;
+ const GLenum faceTarget = _mesa_cube_face_target(target, face);
texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, level);
@@ -536,9 +533,7 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
newViewNumLevels = MIN2(numlevels, origTexObj->NumLevels - minlevel);
newViewNumLayers = MIN2(numlayers, origTexObj->NumLayers - minlayer);
- faceTarget = origTexObj->Target;
- if (faceTarget == GL_TEXTURE_CUBE_MAP)
- faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + minlayer;
+ faceTarget = _mesa_cube_face_target(origTexObj->Target, minlayer);
/* Get a reference to what will become this View's base level */
origTexImage = _mesa_select_tex_image(origTexObj, faceTarget, minlevel);
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index eed241271df..db53377d705 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -40,7 +40,7 @@
#include "prog_statevars.h"
#include "prog_parameter.h"
#include "main/samplerobj.h"
-#include "framebuffer.h"
+#include "main/framebuffer.h"
#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
@@ -353,7 +353,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
}
return;
case STATE_NUM_SAMPLES:
- ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer);
+ ((int *)value)[0] = MAX2(1, _mesa_geometric_samples(ctx->DrawBuffer));
return;
case STATE_DEPTH_RANGE:
value[0] = ctx->ViewportArray[0].Near; /* near */
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 4b89ade1b15..622621bdcbb 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -38,9 +38,9 @@
/**
- * This is used to initialize st->atoms[].
+ * This is used to initialize st->render_atoms[].
*/
-static const struct st_tracked_state *atoms[] =
+static const struct st_tracked_state *render_atoms[] =
{
&st_update_depth_stencil_alpha,
&st_update_clip,
@@ -85,6 +85,11 @@ static const struct st_tracked_state *atoms[] =
&st_bind_tes_ssbos,
&st_bind_fs_ssbos,
&st_bind_gs_ssbos,
+ &st_bind_vs_images,
+ &st_bind_tcs_images,
+ &st_bind_tes_images,
+ &st_bind_gs_images,
+ &st_bind_fs_images,
&st_update_pixel_transfer,
&st_update_tess,
@@ -93,6 +98,22 @@ static const struct st_tracked_state *atoms[] =
};
+/**
+ * This is used to initialize st->compute_atoms[].
+ */
+static const struct st_tracked_state *compute_atoms[] =
+{
+ &st_update_cp,
+ &st_update_compute_texture,
+ &st_update_sampler, /* depends on update_compute_texture for swizzle */
+ &st_update_cs_constants,
+ &st_bind_cs_ubos,
+ &st_bind_cs_atomics,
+ &st_bind_cs_ssbos,
+ &st_bind_cs_images,
+};
+
+
void st_init_atoms( struct st_context *st )
{
/* no-op */
@@ -178,20 +199,41 @@ static void check_attrib_edgeflag(struct st_context *st)
* Update all derived state:
*/
-void st_validate_state( struct st_context *st )
+void st_validate_state( struct st_context *st, enum st_pipeline pipeline )
{
- struct st_state_flags *state = &st->dirty;
+ const struct st_tracked_state **atoms;
+ struct st_state_flags *state;
+ GLuint num_atoms;
GLuint i;
+ /* Get pipeline state. */
+ switch (pipeline) {
+ case ST_PIPELINE_RENDER:
+ atoms = render_atoms;
+ num_atoms = ARRAY_SIZE(render_atoms);
+ state = &st->dirty;
+ break;
+ case ST_PIPELINE_COMPUTE:
+ atoms = compute_atoms;
+ num_atoms = ARRAY_SIZE(compute_atoms);
+ state = &st->dirty_cp;
+ break;
+ default:
+ unreachable("Invalid pipeline specified");
+ }
+
/* Get Mesa driver state. */
st->dirty.st |= st->ctx->NewDriverState;
+ st->dirty_cp.st |= st->ctx->NewDriverState;
st->ctx->NewDriverState = 0;
- check_attrib_edgeflag(st);
+ if (pipeline == ST_PIPELINE_RENDER) {
+ check_attrib_edgeflag(st);
- check_program_state( st );
+ check_program_state(st);
- st_manager_validate_framebuffers(st);
+ st_manager_validate_framebuffers(st);
+ }
if (state->st == 0 && state->mesa == 0)
return;
@@ -211,7 +253,7 @@ void st_validate_state( struct st_context *st )
memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < ARRAY_SIZE(atoms); i++) {
+ for (i = 0; i < num_atoms; i++) {
const struct st_tracked_state *atom = atoms[i];
struct st_state_flags generated;
@@ -242,7 +284,7 @@ void st_validate_state( struct st_context *st )
}
else {
- for (i = 0; i < ARRAY_SIZE(atoms); i++) {
+ for (i = 0; i < num_atoms; i++) {
if (check_state(state, &atoms[i]->dirty))
atoms[i]->update( st );
}
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index 3a9153c80cb..31bb2dd98bf 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -36,6 +36,9 @@
#include "main/glheader.h"
+#include "state_tracker/st_api.h"
+#include "state_tracker/st_context.h"
+
struct st_context;
struct st_tracked_state;
@@ -43,7 +46,7 @@ void st_init_atoms( struct st_context *st );
void st_destroy_atoms( struct st_context *st );
-void st_validate_state( struct st_context *st );
+void st_validate_state( struct st_context *st, enum st_pipeline pipeline );
extern const struct st_tracked_state st_update_array;
@@ -55,6 +58,7 @@ extern const struct st_tracked_state st_update_gp;
extern const struct st_tracked_state st_update_tep;
extern const struct st_tracked_state st_update_tcp;
extern const struct st_tracked_state st_update_vp;
+extern const struct st_tracked_state st_update_cp;
extern const struct st_tracked_state st_update_rasterizer;
extern const struct st_tracked_state st_update_polygon_stipple;
extern const struct st_tracked_state st_update_viewport;
@@ -68,26 +72,37 @@ extern const struct st_tracked_state st_update_vertex_texture;
extern const struct st_tracked_state st_update_geometry_texture;
extern const struct st_tracked_state st_update_tessctrl_texture;
extern const struct st_tracked_state st_update_tesseval_texture;
+extern const struct st_tracked_state st_update_compute_texture;
extern const struct st_tracked_state st_update_fs_constants;
extern const struct st_tracked_state st_update_gs_constants;
extern const struct st_tracked_state st_update_tes_constants;
extern const struct st_tracked_state st_update_tcs_constants;
extern const struct st_tracked_state st_update_vs_constants;
+extern const struct st_tracked_state st_update_cs_constants;
extern const struct st_tracked_state st_bind_fs_ubos;
extern const struct st_tracked_state st_bind_vs_ubos;
extern const struct st_tracked_state st_bind_gs_ubos;
extern const struct st_tracked_state st_bind_tcs_ubos;
extern const struct st_tracked_state st_bind_tes_ubos;
+extern const struct st_tracked_state st_bind_cs_ubos;
extern const struct st_tracked_state st_bind_fs_atomics;
extern const struct st_tracked_state st_bind_vs_atomics;
extern const struct st_tracked_state st_bind_gs_atomics;
extern const struct st_tracked_state st_bind_tcs_atomics;
extern const struct st_tracked_state st_bind_tes_atomics;
+extern const struct st_tracked_state st_bind_cs_atomics;
extern const struct st_tracked_state st_bind_fs_ssbos;
extern const struct st_tracked_state st_bind_vs_ssbos;
extern const struct st_tracked_state st_bind_gs_ssbos;
extern const struct st_tracked_state st_bind_tcs_ssbos;
extern const struct st_tracked_state st_bind_tes_ssbos;
+extern const struct st_tracked_state st_bind_cs_ssbos;
+extern const struct st_tracked_state st_bind_fs_images;
+extern const struct st_tracked_state st_bind_vs_images;
+extern const struct st_tracked_state st_bind_gs_images;
+extern const struct st_tracked_state st_bind_tcs_images;
+extern const struct st_tracked_state st_bind_tes_images;
+extern const struct st_tracked_state st_bind_cs_images;
extern const struct st_tracked_state st_update_pixel_transfer;
extern const struct st_tracked_state st_update_tess;
diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c
index 1c30d1fb701..a27dbe0460d 100644
--- a/src/mesa/state_tracker/st_atom_atomicbuf.c
+++ b/src/mesa/state_tracker/st_atom_atomicbuf.c
@@ -58,9 +58,11 @@ st_bind_atomics(struct st_context *st,
st_buffer_object(binding->BufferObject);
struct pipe_shader_buffer sb = { 0 };
- sb.buffer = st_obj->buffer;
- sb.buffer_offset = binding->Offset;
- sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
+ if (st_obj && st_obj->buffer) {
+ sb.buffer = st_obj->buffer;
+ sb.buffer_offset = binding->Offset;
+ sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
+ }
st->pipe->set_shader_buffers(st->pipe, shader_type,
atomic->Binding, 1, &sb);
@@ -156,3 +158,21 @@ const struct st_tracked_state st_bind_tes_atomics = {
},
bind_tes_atomics
};
+
+static void
+bind_cs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_atomics = {
+ "st_bind_cs_atomics",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_cs_atomics
+};
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index 66811d29c29..407dfd31c80 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -61,7 +61,8 @@ void st_upload_constants( struct st_context *st,
shader_type == PIPE_SHADER_FRAGMENT ||
shader_type == PIPE_SHADER_GEOMETRY ||
shader_type == PIPE_SHADER_TESS_CTRL ||
- shader_type == PIPE_SHADER_TESS_EVAL);
+ shader_type == PIPE_SHADER_TESS_EVAL ||
+ shader_type == PIPE_SHADER_COMPUTE);
/* update constants */
if (params && params->NumParameters) {
@@ -226,6 +227,28 @@ const struct st_tracked_state st_update_tes_constants = {
update_tes_constants /* update */
};
+/* Compute shader:
+ */
+static void update_cs_constants(struct st_context *st )
+{
+ struct st_compute_program *cp = st->cp;
+ struct gl_program_parameter_list *params;
+
+ if (cp) {
+ params = cp->Base.Base.Parameters;
+ st_upload_constants( st, params, PIPE_SHADER_COMPUTE );
+ }
+}
+
+const struct st_tracked_state st_update_cs_constants = {
+ "st_update_cs_constants", /* name */
+ { /* dirty */
+ _NEW_PROGRAM_CONSTANTS, /* mesa */
+ ST_NEW_COMPUTE_PROGRAM, /* st */
+ },
+ update_cs_constants /* update */
+};
+
static void st_bind_ubos(struct st_context *st,
struct gl_shader *shader,
unsigned shader_type)
@@ -363,3 +386,24 @@ const struct st_tracked_state st_bind_tes_ubos = {
},
bind_tes_ubos
};
+
+static void bind_cs_ubos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (!prog)
+ return;
+
+ st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_ubos = {
+ "st_bind_cs_ubos",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_UNIFORM_BUFFER,
+ },
+ bind_cs_ubos
+};
diff --git a/src/mesa/state_tracker/st_atom_image.c b/src/mesa/state_tracker/st_atom_image.c
new file mode 100644
index 00000000000..4b48bc30b69
--- /dev/null
+++ b/src/mesa/state_tracker/st_atom_image.c
@@ -0,0 +1,236 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_cb_texture.h"
+#include "st_debug.h"
+#include "st_texture.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+#include "st_format.h"
+
+static void
+st_bind_images(struct st_context *st, struct gl_shader *shader,
+ unsigned shader_type)
+{
+ unsigned i;
+ struct pipe_image_view images[MAX_IMAGE_UNIFORMS];
+ struct gl_program_constants *c = &st->ctx->Const.Program[shader->Stage];
+
+ if (!shader || !st->pipe->set_shader_images)
+ return;
+
+ for (i = 0; i < shader->NumImages; i++) {
+ struct gl_image_unit *u = &st->ctx->ImageUnits[shader->ImageUnits[i]];
+ struct st_texture_object *stObj = st_texture_object(u->TexObj);
+ struct pipe_image_view *img = &images[i];
+
+ if (!stObj ||
+ !st_finalize_texture(st->ctx, st->pipe, u->TexObj) ||
+ !stObj->pt) {
+ memset(img, 0, sizeof(*img));
+ continue;
+ }
+
+ img->resource = stObj->pt;
+ img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
+ if (stObj->pt->target == PIPE_BUFFER) {
+ unsigned base, size;
+ unsigned f, n;
+ const struct util_format_description *desc
+ = util_format_description(img->format);
+
+ base = stObj->base.BufferOffset;
+ assert(base < stObj->pt->width0);
+ size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);
+
+ f = (base / (desc->block.bits / 8)) * desc->block.width;
+ n = (size / (desc->block.bits / 8)) * desc->block.width;
+ assert(n > 0);
+ img->u.buf.first_element = f;
+ img->u.buf.last_element = f + (n - 1);
+ } else {
+ img->u.tex.level = u->Level + stObj->base.MinLevel;
+ if (stObj->pt->target == PIPE_TEXTURE_3D) {
+ if (u->Layered) {
+ img->u.tex.first_layer = 0;
+ img->u.tex.last_layer = u_minify(stObj->pt->depth0, img->u.tex.level) - 1;
+ } else {
+ img->u.tex.first_layer = u->_Layer;
+ img->u.tex.last_layer = u->_Layer;
+ }
+ } else {
+ img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
+ img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
+ if (u->Layered && img->resource->array_size > 1) {
+ if (stObj->base.Immutable)
+ img->u.tex.last_layer += stObj->base.NumLayers - 1;
+ else
+ img->u.tex.last_layer += img->resource->array_size - 1;
+ }
+ }
+ }
+ }
+ st->pipe->set_shader_images(st->pipe, shader_type, 0, shader->NumImages,
+ images);
+ /* clear out any stale shader images */
+ if (shader->NumImages < c->MaxImageUniforms)
+ st->pipe->set_shader_images(
+ st->pipe, shader_type,
+ shader->NumImages,
+ c->MaxImageUniforms - shader->NumImages,
+ NULL);
+}
+
+static void bind_vs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_VERTEX], PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_images = {
+ "st_bind_vs_images",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_vs_images
+};
+
+static void bind_fs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_images = {
+ "st_bind_fs_images",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_fs_images
+};
+
+static void bind_gs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_images = {
+ "st_bind_gs_images",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_gs_images
+};
+
+static void bind_tcs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_images = {
+ "st_bind_tcs_images",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_tcs_images
+};
+
+static void bind_tes_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_images = {
+ "st_bind_tes_images",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_tes_images
+};
+
+static void bind_cs_images(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (!prog)
+ return;
+
+ st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_images = {
+ "st_bind_cs_images",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_IMAGE_UNITS,
+ },
+ bind_cs_images
+};
diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c
index 94231cf1946..82dcf5ee0ca 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -321,6 +321,14 @@ update_samplers(struct st_context *st)
st->state.samplers[PIPE_SHADER_TESS_EVAL],
&st->state.num_samplers[PIPE_SHADER_TESS_EVAL]);
}
+ if (ctx->ComputeProgram._Current) {
+ update_shader_samplers(st,
+ PIPE_SHADER_COMPUTE,
+ &ctx->ComputeProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits,
+ st->state.samplers[PIPE_SHADER_COMPUTE],
+ &st->state.num_samplers[PIPE_SHADER_COMPUTE]);
+ }
}
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index c8650a5899a..a88f0352746 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -255,3 +255,35 @@ const struct st_tracked_state st_update_tep = {
},
update_tep /* update */
};
+
+
+
+static void
+update_cp( struct st_context *st )
+{
+ struct st_compute_program *stcp;
+
+ if (!st->ctx->ComputeProgram._Current) {
+ cso_set_compute_shader_handle(st->cso_context, NULL);
+ return;
+ }
+
+ stcp = st_compute_program(st->ctx->ComputeProgram._Current);
+ assert(stcp->Base.Base.Target == GL_COMPUTE_PROGRAM_NV);
+
+ st->cp_variant = st_get_cp_variant(st, &stcp->tgsi, &stcp->variants);
+
+ st_reference_compprog(st, &st->cp, stcp);
+
+ cso_set_compute_shader_handle(st->cso_context,
+ st->cp_variant->driver_shader);
+}
+
+const struct st_tracked_state st_update_cp = {
+ "st_update_cp", /* name */
+ { /* dirty */
+ 0, /* mesa */
+ ST_NEW_COMPUTE_PROGRAM /* st */
+ },
+ update_cp /* update */
+};
diff --git a/src/mesa/state_tracker/st_atom_storagebuf.c b/src/mesa/state_tracker/st_atom_storagebuf.c
index f165cc3e0a1..37b4c4d256d 100644
--- a/src/mesa/state_tracker/st_atom_storagebuf.c
+++ b/src/mesa/state_tracker/st_atom_storagebuf.c
@@ -194,3 +194,24 @@ const struct st_tracked_state st_bind_tes_ssbos = {
},
bind_tes_ssbos
};
+
+static void bind_cs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ PIPE_SHADER_COMPUTE);
+}
+
+const struct st_tracked_state st_bind_cs_ssbos = {
+ "st_bind_cs_ssbos",
+ {
+ 0,
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_cs_ssbos
+};
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 3e3775200a8..4b7ad77b47a 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -32,6 +32,7 @@
*/
+#include "main/context.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/samplerobj.h"
@@ -147,9 +148,7 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
else
return SWIZZLE_XYZW;
case GL_STENCIL_INDEX:
- return SWIZZLE_XYZW;
case GL_DEPTH_STENCIL:
- /* fall-through */
case GL_DEPTH_COMPONENT:
/* Now examine the depth mode */
switch (depthMode) {
@@ -193,15 +192,29 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
static unsigned
-get_texture_format_swizzle(const struct st_texture_object *stObj,
+get_texture_format_swizzle(const struct st_context *st,
+ const struct st_texture_object *stObj,
unsigned glsl_version)
{
GLenum baseFormat = _mesa_texture_base_format(&stObj->base);
unsigned tex_swizzle;
if (baseFormat != GL_NONE) {
+ GLenum depth_mode = stObj->base.DepthMode;
+ /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
+ * with depth component data specified with a sized internal format.
+ */
+ if (_mesa_is_gles3(st->ctx) &&
+ util_format_is_depth_or_stencil(stObj->pt->format)) {
+ const struct st_texture_image *firstImage =
+ st_texture_image_const(_mesa_base_tex_image(&stObj->base));
+ if (firstImage->base.InternalFormat != GL_DEPTH_COMPONENT &&
+ firstImage->base.InternalFormat != GL_DEPTH_STENCIL &&
+ firstImage->base.InternalFormat != GL_STENCIL_INDEX)
+ depth_mode = GL_RED;
+ }
tex_swizzle = compute_texture_format_swizzle(baseFormat,
- stObj->base.DepthMode,
+ depth_mode,
stObj->pt->format,
glsl_version);
}
@@ -221,10 +234,11 @@ get_texture_format_swizzle(const struct st_texture_object *stObj,
* \param stObj the st texture object,
*/
static boolean
-check_sampler_swizzle(const struct st_texture_object *stObj,
+check_sampler_swizzle(const struct st_context *st,
+ const struct st_texture_object *stObj,
struct pipe_sampler_view *sv, unsigned glsl_version)
{
- unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
+ unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version);
return ((sv->swizzle_r != GET_SWZ(swizzle, 0)) ||
(sv->swizzle_g != GET_SWZ(swizzle, 1)) ||
@@ -251,13 +265,13 @@ static unsigned last_layer(struct st_texture_object *stObj)
}
static struct pipe_sampler_view *
-st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
+st_create_texture_sampler_view_from_stobj(struct st_context *st,
struct st_texture_object *stObj,
enum pipe_format format,
unsigned glsl_version)
{
struct pipe_sampler_view templ;
- unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
+ unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version);
u_sampler_view_default_template(&templ,
stObj->pt,
@@ -297,7 +311,7 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
templ.swizzle_a = GET_SWZ(swizzle, 3);
}
- return pipe->create_sampler_view(pipe, stObj->pt, &templ);
+ return st->pipe->create_sampler_view(st->pipe, stObj->pt, &templ);
}
@@ -327,7 +341,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
/* if sampler view has changed dereference it */
if (*sv) {
- if (check_sampler_swizzle(stObj, *sv, glsl_version) ||
+ if (check_sampler_swizzle(st, stObj, *sv, glsl_version) ||
(format != (*sv)->format) ||
gl_target_to_pipe(stObj->base.Target) != (*sv)->target ||
stObj->base.MinLevel + stObj->base.BaseLevel != (*sv)->u.tex.first_level ||
@@ -339,7 +353,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
}
if (!*sv) {
- *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj,
+ *sv = st_create_texture_sampler_view_from_stobj(st, stObj,
format, glsl_version);
} else if ((*sv)->context != st->pipe) {
@@ -534,6 +548,22 @@ update_tesseval_textures(struct st_context *st)
}
+static void
+update_compute_textures(struct st_context *st)
+{
+ const struct gl_context *ctx = st->ctx;
+
+ if (ctx->ComputeProgram._Current) {
+ update_textures(st,
+ MESA_SHADER_COMPUTE,
+ &ctx->ComputeProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits,
+ st->state.sampler_views[PIPE_SHADER_COMPUTE],
+ &st->state.num_sampler_views[PIPE_SHADER_COMPUTE]);
+ }
+}
+
+
const struct st_tracked_state st_update_fragment_texture = {
"st_update_texture", /* name */
{ /* dirty */
@@ -582,3 +612,13 @@ const struct st_tracked_state st_update_tesseval_texture = {
},
update_tesseval_textures /* update */
};
+
+
+const struct st_tracked_state st_update_compute_texture = {
+ "st_update_compute_texture", /* name */
+ { /* dirty */
+ _NEW_TEXTURE, /* mesa */
+ ST_NEW_COMPUTE_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */
+ },
+ update_compute_textures /* update */
+};
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 627b8cbd598..4fd2dfef8cc 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -33,6 +33,7 @@
#include "main/imports.h"
#include "main/image.h"
#include "main/bufferobj.h"
+#include "main/dlist.h"
#include "main/macros.h"
#include "main/pbo.h"
#include "program/program.h"
@@ -41,6 +42,7 @@
#include "st_context.h"
#include "st_atom.h"
#include "st_atom_constbuf.h"
+#include "st_draw.h"
#include "st_program.h"
#include "st_cb_bitmap.h"
#include "st_texture.h"
@@ -49,7 +51,6 @@
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_inlines.h"
-#include "util/u_draw_quad.h"
#include "util/u_simple_shaders.h"
#include "util/u_upload_mgr.h"
#include "program/prog_instruction.h"
@@ -183,7 +184,8 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
static void
setup_render_state(struct gl_context *ctx,
struct pipe_sampler_view *sv,
- const GLfloat *color)
+ const GLfloat *color,
+ bool atlas)
{
struct st_context *st = st_context(ctx);
struct cso_context *cso = st->cso_context;
@@ -194,7 +196,7 @@ setup_render_state(struct gl_context *ctx,
key.st = st->has_shareable_shaders ? NULL : st;
key.bitmap = GL_TRUE;
key.clamp_color = st->clamp_frag_color_in_shader &&
- st->ctx->Color._ClampFragmentColor;
+ ctx->Color._ClampFragmentColor;
fpv = st_get_fp_variant(st, st->fp, &key);
@@ -214,18 +216,15 @@ setup_render_state(struct gl_context *ctx,
COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
}
- cso_save_rasterizer(cso);
- cso_save_fragment_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_viewport(cso);
- cso_save_fragment_shader(cso);
- cso_save_stream_outputs(cso);
- cso_save_vertex_shader(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_geometry_shader(cso);
- cso_save_vertex_elements(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_save_state(cso, (CSO_BIT_RASTERIZER |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BITS_ALL_SHADERS));
+
/* rasterizer state: just scissor */
st->bitmap.rasterizer.scissor = ctx->Scissor.EnableFlags & 1;
@@ -251,7 +250,10 @@ setup_render_state(struct gl_context *ctx,
for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++) {
samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i];
}
- samplers[fpv->bitmap_sampler] = &st->bitmap.sampler;
+ if (atlas)
+ samplers[fpv->bitmap_sampler] = &st->bitmap.atlas_sampler;
+ else
+ samplers[fpv->bitmap_sampler] = &st->bitmap.sampler;
cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num,
(const struct pipe_sampler_state **) samplers);
}
@@ -268,21 +270,12 @@ setup_render_state(struct gl_context *ctx,
}
/* viewport state: viewport matching window dims */
- {
- const GLboolean invert = st->state.fb_orientation == Y_0_TOP;
- const GLfloat width = (GLfloat)st->state.framebuffer.width;
- const GLfloat height = (GLfloat)st->state.framebuffer.height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * width;
- vp.scale[1] = height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 0.5f;
- vp.translate[0] = 0.5f * width;
- vp.translate[1] = 0.5f * height;
- vp.translate[2] = 0.5f;
- cso_set_viewport(cso, &vp);
- }
+ cso_set_viewport_dims(cso, st->state.framebuffer.width,
+ st->state.framebuffer.height,
+ st->state.fb_orientation == Y_0_TOP);
+
+ cso_set_vertex_elements(cso, 3, st->util_velems);
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
}
@@ -296,18 +289,7 @@ restore_render_state(struct gl_context *ctx)
struct st_context *st = st_context(ctx);
struct cso_context *cso = st->cso_context;
- cso_restore_rasterizer(cso);
- cso_restore_fragment_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_viewport(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
- cso_restore_stream_outputs(cso);
+ cso_restore_state(cso);
}
@@ -322,7 +304,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
- struct pipe_resource *vbuf = NULL;
const float fb_width = (float) st->state.framebuffer.width;
const float fb_height = (float) st->state.framebuffer.height;
const float x0 = (float) x;
@@ -335,8 +316,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
const float clip_y0 = y0 / fb_height * 2.0f - 1.0f;
const float clip_x1 = x1 / fb_width * 2.0f - 1.0f;
const float clip_y1 = y1 / fb_height * 2.0f - 1.0f;
- float (*vertices)[3][4]; /**< vertex pos + color + texcoord */
- unsigned offset, i;
/* limit checks */
{
@@ -349,7 +328,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
assert(height <= (GLsizei) maxSize);
}
- setup_render_state(ctx, sv, color);
+ setup_render_state(ctx, sv, color, false);
/* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
z = z * 2.0f - 1.0f;
@@ -360,62 +339,13 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
tBot = (float) height;
}
- u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4,
- &offset, &vbuf, (void **) &vertices);
- if (!vbuf) {
+ if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z,
+ sLeft, tBot, sRight, tTop, color, 0)) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glBitmap");
- restore_render_state(ctx);
- return;
}
- /* Positions are in clip coords since we need to do clipping in case
- * the bitmap quad goes beyond the window bounds.
- */
- vertices[0][0][0] = clip_x0;
- vertices[0][0][1] = clip_y0;
- vertices[0][2][0] = sLeft;
- vertices[0][2][1] = tTop;
-
- vertices[1][0][0] = clip_x1;
- vertices[1][0][1] = clip_y0;
- vertices[1][2][0] = sRight;
- vertices[1][2][1] = tTop;
-
- vertices[2][0][0] = clip_x1;
- vertices[2][0][1] = clip_y1;
- vertices[2][2][0] = sRight;
- vertices[2][2][1] = tBot;
-
- vertices[3][0][0] = clip_x0;
- vertices[3][0][1] = clip_y1;
- vertices[3][2][0] = sLeft;
- vertices[3][2][1] = tBot;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- vertices[i][0][2] = z;
- vertices[i][0][3] = 1.0f;
- vertices[i][1][0] = color[0];
- vertices[i][1][1] = color[1];
- vertices[i][1][2] = color[2];
- vertices[i][1][3] = color[3];
- vertices[i][2][2] = 0.0; /*R*/
- vertices[i][2][3] = 1.0; /*Q*/
- }
-
- u_upload_unmap(st->uploader);
-
- util_draw_vertex_buffer(pipe, st->cso_context, vbuf,
- cso_get_aux_vertex_buffer_slot(st->cso_context),
- offset,
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- 3); /* attribs/vert */
-
restore_render_state(ctx);
- pipe_resource_reference(&vbuf, NULL);
-
/* We uploaded modified constants, need to invalidate them. */
st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
}
@@ -555,7 +485,7 @@ accum_bitmap(struct gl_context *ctx,
struct st_context *st = ctx->st;
struct bitmap_cache *cache = st->bitmap.cache;
int px = -999, py = -999;
- const GLfloat z = st->ctx->Current.RasterPos[2];
+ const GLfloat z = ctx->Current.RasterPos[2];
if (width > BITMAP_CACHE_WIDTH ||
height > BITMAP_CACHE_HEIGHT)
@@ -566,7 +496,7 @@ accum_bitmap(struct gl_context *ctx,
py = y - cache->ypos;
if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
- !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) ||
+ !TEST_EQ_4V(ctx->Current.RasterColor, cache->color) ||
((fabs(z - cache->zpos) > Z_EPSILON))) {
/* This bitmap would extend beyond cache bounds, or the bitmap
* color is changing
@@ -584,7 +514,7 @@ accum_bitmap(struct gl_context *ctx,
cache->ypos = y - py;
cache->zpos = z;
cache->empty = GL_FALSE;
- COPY_4FV(cache->color, st->ctx->Current.RasterColor);
+ COPY_4FV(cache->color, ctx->Current.RasterColor);
}
assert(px != -999);
@@ -645,6 +575,9 @@ init_bitmap_state(struct st_context *st)
st->bitmap.sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
st->bitmap.sampler.normalized_coords = st->internal_target == PIPE_TEXTURE_2D;
+ st->bitmap.atlas_sampler = st->bitmap.sampler;
+ st->bitmap.atlas_sampler.normalized_coords = 0;
+
/* init baseline rasterizer state once */
memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
st->bitmap.rasterizer.half_pixel_center = 1;
@@ -713,7 +646,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
* explicitly uploaded in the draw_bitmap_quad() function.
*/
if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) {
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
}
if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap))
@@ -728,8 +661,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
if (sv) {
draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
- width, height, sv,
- st->ctx->Current.RasterColor);
+ width, height, sv, ctx->Current.RasterColor);
pipe_sampler_view_reference(&sv, NULL);
}
@@ -740,11 +672,139 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
}
+/**
+ * Called via ctx->Driver.DrawAtlasBitmap()
+ */
+static void
+st_DrawAtlasBitmaps(struct gl_context *ctx,
+ const struct gl_bitmap_atlas *atlas,
+ GLuint count, const GLubyte *ids)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct st_texture_object *stObj = st_texture_object(atlas->texObj);
+ struct pipe_sampler_view *sv;
+ /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+ const float z = ctx->Current.RasterPos[2] * 2.0f - 1.0f;
+ const float *color = ctx->Current.RasterColor;
+ const float clip_x_scale = 2.0f / st->state.framebuffer.width;
+ const float clip_y_scale = 2.0f / st->state.framebuffer.height;
+ const unsigned num_verts = count * 4;
+ const unsigned num_vert_bytes = num_verts * sizeof(struct st_util_vertex);
+ struct st_util_vertex *verts;
+ struct pipe_vertex_buffer vb = {0};
+ unsigned i;
+
+ if (!st->bitmap.cache) {
+ init_bitmap_state(st);
+ }
+
+ st_flush_bitmap_cache(st);
+
+ st_validate_state(st, ST_PIPELINE_RENDER);
+
+ sv = st_create_texture_sampler_view(pipe, stObj->pt);
+
+ setup_render_state(ctx, sv, color, true);
+
+ vb.stride = sizeof(struct st_util_vertex);
+
+ u_upload_alloc(st->uploader, 0, num_vert_bytes, 4,
+ &vb.buffer_offset, &vb.buffer, (void **) &verts);
+
+ /* build quads vertex data */
+ for (i = 0; i < count; i++) {
+ const GLfloat epsilon = 0.0001F;
+ const struct gl_bitmap_glyph *g = &atlas->glyphs[ids[i]];
+ const float xmove = g->xmove, ymove = g->ymove;
+ const float xorig = g->xorig, yorig = g->yorig;
+ const float s0 = g->x, t0 = g->y;
+ const float s1 = s0 + g->w, t1 = t0 + g->h;
+ const float x0 = IFLOOR(ctx->Current.RasterPos[0] - xorig + epsilon);
+ const float y0 = IFLOOR(ctx->Current.RasterPos[1] - yorig + epsilon);
+ const float x1 = x0 + g->w, y1 = y0 + g->h;
+ const float clip_x0 = x0 * clip_x_scale - 1.0f;
+ const float clip_y0 = y0 * clip_y_scale - 1.0f;
+ const float clip_x1 = x1 * clip_x_scale - 1.0f;
+ const float clip_y1 = y1 * clip_y_scale - 1.0f;
+
+ /* lower-left corner */
+ verts->x = clip_x0;
+ verts->y = clip_y0;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s0;
+ verts->t = t0;
+ verts++;
+
+ /* lower-right corner */
+ verts->x = clip_x1;
+ verts->y = clip_y0;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s1;
+ verts->t = t0;
+ verts++;
+
+ /* upper-right corner */
+ verts->x = clip_x1;
+ verts->y = clip_y1;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s1;
+ verts->t = t1;
+ verts++;
+
+ /* upper-left corner */
+ verts->x = clip_x0;
+ verts->y = clip_y1;
+ verts->z = z;
+ verts->r = color[0];
+ verts->g = color[1];
+ verts->b = color[2];
+ verts->a = color[3];
+ verts->s = s0;
+ verts->t = t1;
+ verts++;
+
+ /* Update the raster position */
+ ctx->Current.RasterPos[0] += xmove;
+ ctx->Current.RasterPos[1] += ymove;
+ }
+
+ u_upload_unmap(st->uploader);
+
+ cso_set_vertex_buffers(st->cso_context,
+ cso_get_aux_vertex_buffer_slot(st->cso_context),
+ 1, &vb);
+
+ cso_draw_arrays(st->cso_context, PIPE_PRIM_QUADS, 0, num_verts);
+
+ restore_render_state(ctx);
+
+ pipe_resource_reference(&vb.buffer, NULL);
+
+ /* We uploaded modified constants, need to invalidate them. */
+ st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
+}
+
+
+
/** Per-context init */
void
st_init_bitmap_functions(struct dd_function_table *functions)
{
functions->Bitmap = st_Bitmap;
+ functions->DrawAtlasBitmaps = st_DrawAtlasBitmaps;
}
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 7b6d10e76b1..55801469f23 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -44,6 +44,7 @@
#include "st_cb_bitmap.h"
#include "st_cb_clear.h"
#include "st_cb_fbo.h"
+#include "st_draw.h"
#include "st_format.h"
#include "st_program.h"
@@ -55,8 +56,6 @@
#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_simple_shaders.h"
-#include "util/u_draw_quad.h"
-#include "util/u_upload_mgr.h"
#include "cso_cache/cso_context.h"
@@ -169,67 +168,6 @@ set_vertex_shader_layered(struct st_context *st)
/**
- * Draw a screen-aligned quadrilateral.
- * Coords are clip coords with y=0=bottom.
- */
-static void
-draw_quad(struct st_context *st,
- float x0, float y0, float x1, float y1, GLfloat z,
- unsigned num_instances,
- const union pipe_color_union *color)
-{
- struct cso_context *cso = st->cso_context;
- struct pipe_vertex_buffer vb = {0};
- GLuint i;
- float (*vertices)[2][4]; /**< vertex pos + color */
-
- vb.stride = 8 * sizeof(float);
-
- u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4,
- &vb.buffer_offset, &vb.buffer,
- (void **) &vertices);
- if (!vb.buffer) {
- return;
- }
-
- /* Convert Z from [0,1] to [-1,1] range */
- z = z * 2.0f - 1.0f;
-
- /* positions */
- vertices[0][0][0] = x0;
- vertices[0][0][1] = y0;
-
- vertices[1][0][0] = x1;
- vertices[1][0][1] = y0;
-
- vertices[2][0][0] = x1;
- vertices[2][0][1] = y1;
-
- vertices[3][0][0] = x0;
- vertices[3][0][1] = y1;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- vertices[i][0][2] = z;
- vertices[i][0][3] = 1.0;
- vertices[i][1][0] = color->f[0];
- vertices[i][1][1] = color->f[1];
- vertices[i][1][2] = color->f[2];
- vertices[i][1][3] = color->f[3];
- }
-
- u_upload_unmap(st->uploader);
-
- /* draw */
- cso_set_vertex_buffers(cso, cso_get_aux_vertex_buffer_slot(cso), 1, &vb);
- cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_FAN, 0, 4,
- 0, num_instances);
- pipe_resource_reference(&vb.buffer, NULL);
-}
-
-
-
-/**
* Do glClear by drawing a quadrilateral.
* The vertices of the quad will be computed from the
* ctx->DrawBuffer->_X/Ymin/max fields.
@@ -238,6 +176,7 @@ static void
clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
{
struct st_context *st = st_context(ctx);
+ struct cso_context *cso = st->cso_context;
const struct gl_framebuffer *fb = ctx->DrawBuffer;
const GLfloat fb_width = (GLfloat) fb->Width;
const GLfloat fb_height = (GLfloat) fb->Height;
@@ -257,21 +196,17 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
x1, y1);
*/
- cso_save_blend(st->cso_context);
- cso_save_stencil_ref(st->cso_context);
- cso_save_depth_stencil_alpha(st->cso_context);
- cso_save_rasterizer(st->cso_context);
- cso_save_sample_mask(st->cso_context);
- cso_save_min_samples(st->cso_context);
- cso_save_viewport(st->cso_context);
- cso_save_fragment_shader(st->cso_context);
- cso_save_stream_outputs(st->cso_context);
- cso_save_vertex_shader(st->cso_context);
- cso_save_tessctrl_shader(st->cso_context);
- cso_save_tesseval_shader(st->cso_context);
- cso_save_geometry_shader(st->cso_context);
- cso_save_vertex_elements(st->cso_context);
- cso_save_aux_vertex_buffer_slot(st->cso_context);
+ cso_save_state(cso, (CSO_BIT_BLEND |
+ CSO_BIT_STENCIL_REF |
+ CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_RASTERIZER |
+ CSO_BIT_SAMPLE_MASK |
+ CSO_BIT_MIN_SAMPLES |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BITS_ALL_SHADERS));
/* blend state: RGBA masking */
{
@@ -298,10 +233,10 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
blend.rt[i].colormask |= PIPE_MASK_A;
}
- if (st->ctx->Color.DitherFlag)
+ if (ctx->Color.DitherFlag)
blend.dither = 1;
}
- cso_set_blend(st->cso_context, &blend);
+ cso_set_blend(cso, &blend);
}
/* depth_stencil state: always pass/set to ref value */
@@ -325,64 +260,49 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
depth_stencil.stencil[0].valuemask = 0xff;
depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
stencil_ref.ref_value[0] = ctx->Stencil.Clear;
- cso_set_stencil_ref(st->cso_context, &stencil_ref);
+ cso_set_stencil_ref(cso, &stencil_ref);
}
- cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
+ cso_set_depth_stencil_alpha(cso, &depth_stencil);
}
- cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
- cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
- cso_set_sample_mask(st->cso_context, ~0);
- cso_set_min_samples(st->cso_context, 1);
- cso_set_rasterizer(st->cso_context, &st->clear.raster);
+ cso_set_vertex_elements(cso, 2, st->util_velems);
+ cso_set_stream_outputs(cso, 0, NULL, NULL);
+ cso_set_sample_mask(cso, ~0);
+ cso_set_min_samples(cso, 1);
+ cso_set_rasterizer(cso, &st->clear.raster);
/* viewport state: viewport matching window dims */
- {
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * fb_width;
- vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 0.5f;
- vp.translate[0] = 0.5f * fb_width;
- vp.translate[1] = 0.5f * fb_height;
- vp.translate[2] = 0.5f;
- cso_set_viewport(st->cso_context, &vp);
- }
+ cso_set_viewport_dims(st->cso_context, fb_width, fb_height,
+ st_fb_orientation(fb) == Y_0_TOP);
set_fragment_shader(st);
- cso_set_tessctrl_shader_handle(st->cso_context, NULL);
- cso_set_tesseval_shader_handle(st->cso_context, NULL);
+ cso_set_tessctrl_shader_handle(cso, NULL);
+ cso_set_tesseval_shader_handle(cso, NULL);
if (num_layers > 1)
set_vertex_shader_layered(st);
else
set_vertex_shader(st);
- /* We can't translate the clear color to the colorbuffer format,
+ /* draw quad matching scissor rect.
+ *
+ * Note: if we're only clearing depth/stencil we still setup vertices
+ * with color, but they'll be ignored.
+ *
+ * We can't translate the clear color to the colorbuffer format,
* because different colorbuffers may have different formats.
*/
-
- /* draw quad matching scissor rect */
- draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, num_layers,
- (union pipe_color_union*)&ctx->Color.ClearColor);
+ if (!st_draw_quad(st, x0, y0, x1, y1,
+ ctx->Depth.Clear * 2.0f - 1.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ (const float *) &ctx->Color.ClearColor.f,
+ num_layers)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glClear");
+ }
/* Restore pipe state */
- cso_restore_blend(st->cso_context);
- cso_restore_stencil_ref(st->cso_context);
- cso_restore_depth_stencil_alpha(st->cso_context);
- cso_restore_rasterizer(st->cso_context);
- cso_restore_sample_mask(st->cso_context);
- cso_restore_min_samples(st->cso_context);
- cso_restore_viewport(st->cso_context);
- cso_restore_fragment_shader(st->cso_context);
- cso_restore_vertex_shader(st->cso_context);
- cso_restore_tessctrl_shader(st->cso_context);
- cso_restore_tesseval_shader(st->cso_context);
- cso_restore_geometry_shader(st->cso_context);
- cso_restore_vertex_elements(st->cso_context);
- cso_restore_aux_vertex_buffer_slot(st->cso_context);
- cso_restore_stream_outputs(st->cso_context);
+ cso_restore_state(cso);
}
@@ -470,7 +390,7 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)
st_flush_bitmap_cache(st);
/* This makes sure the pipe has the latest scissor, etc values */
- st_validate_state( st );
+ st_validate_state( st, ST_PIPELINE_RENDER );
if (mask & BUFFER_BITS_COLOR) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
diff --git a/src/mesa/state_tracker/st_cb_compute.c b/src/mesa/state_tracker/st_cb_compute.c
new file mode 100644
index 00000000000..364159d62d8
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_compute.c
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Samuel Pitoiset
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/state.h"
+#include "st_atom.h"
+#include "st_context.h"
+#include "st_cb_bufferobjects.h"
+#include "st_cb_compute.h"
+
+#include "pipe/p_context.h"
+
+static void st_dispatch_compute_common(struct gl_context *ctx,
+ const GLuint *num_groups,
+ struct pipe_resource *indirect,
+ GLintptr indirect_offset)
+{
+ struct gl_shader_program *prog =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_grid_info info = { 0 };
+
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
+ if (st->dirty_cp.st || ctx->NewDriverState)
+ st_validate_state(st, ST_PIPELINE_COMPUTE);
+
+ for (unsigned i = 0; i < 3; i++) {
+ info.block[i] = prog->Comp.LocalSize[i];
+ info.grid[i] = num_groups ? num_groups[i] : 0;
+ }
+
+ if (indirect) {
+ info.indirect = indirect;
+ info.indirect_offset = indirect_offset;
+ }
+
+ pipe->launch_grid(pipe, &info);
+}
+
+static void st_dispatch_compute(struct gl_context *ctx,
+ const GLuint *num_groups)
+{
+ st_dispatch_compute_common(ctx, num_groups, NULL, 0);
+}
+
+static void st_dispatch_compute_indirect(struct gl_context *ctx,
+ GLintptr indirect_offset)
+{
+ struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
+ struct pipe_resource *indirect = st_buffer_object(indirect_buffer)->buffer;
+
+ st_dispatch_compute_common(ctx, NULL, indirect, indirect_offset);
+}
+
+void st_init_compute_functions(struct dd_function_table *functions)
+{
+ functions->DispatchCompute = st_dispatch_compute;
+ functions->DispatchComputeIndirect = st_dispatch_compute_indirect;
+}
diff --git a/src/mesa/state_tracker/st_cb_compute.h b/src/mesa/state_tracker/st_cb_compute.h
new file mode 100644
index 00000000000..78ec756a69e
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_compute.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Samuel Pitoiset
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef ST_CB_COMPUTE_H
+#define ST_CB_COMPUTE_H
+
+#include "main/compiler.h"
+
+struct dd_function_table;
+
+extern void
+st_init_compute_functions(struct dd_function_table *functions);
+
+#endif /* ST_CB_COMPUTE_H */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index fd58886a782..51d4ae51918 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -35,6 +35,7 @@
#include "main/bufferobj.h"
#include "main/blit.h"
#include "main/format_pack.h"
+#include "main/framebuffer.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/pack.h"
@@ -56,6 +57,7 @@
#include "st_cb_fbo.h"
#include "st_context.h"
#include "st_debug.h"
+#include "st_draw.h"
#include "st_format.h"
#include "st_program.h"
#include "st_texture.h"
@@ -63,16 +65,45 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "tgsi/tgsi_ureg.h"
-#include "util/u_draw_quad.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_tile.h"
-#include "util/u_upload_mgr.h"
#include "cso_cache/cso_context.h"
/**
+ * We have a simple glDrawPixels cache to try to optimize the case where the
+ * same image is drawn over and over again. It basically works as follows:
+ *
+ * 1. After we construct a texture map with the image and draw it, we do
+ * not discard the texture. We keep it around, plus we note the
+ * glDrawPixels width, height, format, etc. parameters and keep a copy
+ * of the image in a malloc'd buffer.
+ *
+ * 2. On the next glDrawPixels we check if the parameters match the previous
+ * call. If those match, we check if the image matches the previous image
+ * via a memcmp() call. If everything matches, we re-use the previous
+ * texture, thereby avoiding the cost creating a new texture and copying
+ * the image to it.
+ *
+ * The effectiveness of this cache depends upon:
+ * 1. If the memcmp() finds a difference, it happens relatively quickly.
+ Hopefully, not just the last pixels differ!
+ * 2. If the memcmp() finds no difference, doing that check is faster than
+ * creating and loading a texture.
+ *
+ * Notes:
+ * 1. We don't support any pixel unpacking parameters.
+ * 2. We don't try to cache images in Pixel Buffer Objects.
+ * 3. Instead of saving the whole image, perhaps some sort of reliable
+ * checksum function could be used instead.
+ */
+#define USE_DRAWPIXELS_CACHE 1
+
+
+
+/**
* Create fragment program that does a TEX() instruction to get a Z and/or
* stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL.
* Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX).
@@ -162,22 +193,22 @@ make_passthrough_vertex_shader(struct st_context *st,
return NULL;
/* MOV result.pos, vertex.pos; */
- ureg_MOV(ureg,
+ ureg_MOV(ureg,
ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ),
ureg_DECL_vs_input( ureg, 0 ));
-
- /* MOV result.texcoord0, vertex.attr[1]; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, texcoord_semantic, 0 ),
- ureg_DECL_vs_input( ureg, 1 ));
-
+
if (passColor) {
- /* MOV result.color0, vertex.attr[2]; */
- ureg_MOV(ureg,
+ /* MOV result.color0, vertex.attr[1]; */
+ ureg_MOV(ureg,
ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ),
- ureg_DECL_vs_input( ureg, 2 ));
+ ureg_DECL_vs_input( ureg, 1 ));
}
+ /* MOV result.texcoord0, vertex.attr[2]; */
+ ureg_MOV(ureg,
+ ureg_DECL_output( ureg, texcoord_semantic, 0 ),
+ ureg_DECL_vs_input( ureg, 2 ));
+
ureg_END( ureg );
st->drawpix.vert_shaders[passColor] =
@@ -347,6 +378,39 @@ make_texture(struct st_context *st,
enum pipe_format pipeFormat;
GLenum baseInternalFormat;
+#if USE_DRAWPIXELS_CACHE
+ const GLint bpp = _mesa_bytes_per_pixel(format, type);
+
+ /* Check if the glDrawPixels() parameters and state matches the cache */
+ if (width == st->drawpix_cache.width &&
+ height == st->drawpix_cache.height &&
+ format == st->drawpix_cache.format &&
+ type == st->drawpix_cache.type &&
+ pixels == st->drawpix_cache.user_pointer &&
+ !_mesa_is_bufferobj(unpack->BufferObj) &&
+ (unpack->RowLength == 0 || unpack->RowLength == width) &&
+ unpack->SkipPixels == 0 &&
+ unpack->SkipRows == 0 &&
+ unpack->SwapBytes == GL_FALSE &&
+ st->drawpix_cache.image) {
+ /* check if the pixel data is the same */
+ if (memcmp(pixels, st->drawpix_cache.image, width * height * bpp) == 0) {
+ /* OK, re-use the cached texture */
+ return st->drawpix_cache.texture;
+ }
+ }
+
+ /* discard the cached image and texture (if there is one) */
+ st->drawpix_cache.width = 0;
+ st->drawpix_cache.height = 0;
+ st->drawpix_cache.user_pointer = NULL;
+ if (st->drawpix_cache.image) {
+ free(st->drawpix_cache.image);
+ st->drawpix_cache.image = NULL;
+ }
+ pipe_resource_reference(&st->drawpix_cache.texture, NULL);
+#endif
+
/* Choose a pixel format for the temp texture which will hold the
* image to draw.
*/
@@ -437,107 +501,29 @@ make_texture(struct st_context *st,
_mesa_unmap_pbo_source(ctx, unpack);
- return pt;
-}
-
-
-/**
- * Draw quad with texcoords and optional color.
- * Coords are gallium window coords with y=0=top.
- * \param color may be null
- * \param invertTex if true, flip texcoords vertically
- */
-static void
-draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
- GLfloat x1, GLfloat y1, const GLfloat *color,
- GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLfloat (*verts)[3][4]; /* four verts, three attribs, XYZW */
- struct pipe_resource *buf = NULL;
- unsigned offset;
-
- u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), 4, &offset,
- &buf, (void **) &verts);
- if (!buf) {
- return;
- }
-
- /* setup vertex data */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat) fb->Width;
- const GLfloat fb_height = (GLfloat) fb->Height;
- const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f;
- const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f;
- const GLfloat sLeft = 0.0f, sRight = maxXcoord;
- const GLfloat tTop = invertTex ? maxYcoord : 0.0f;
- const GLfloat tBot = invertTex ? 0.0f : maxYcoord;
- GLuint i;
-
- /* upper-left */
- verts[0][0][0] = clip_x0; /* v[0].attr[0].x */
- verts[0][0][1] = clip_y0; /* v[0].attr[0].y */
-
- /* upper-right */
- verts[1][0][0] = clip_x1;
- verts[1][0][1] = clip_y0;
-
- /* lower-right */
- verts[2][0][0] = clip_x1;
- verts[2][0][1] = clip_y1;
-
- /* lower-left */
- verts[3][0][0] = clip_x0;
- verts[3][0][1] = clip_y1;
-
- verts[0][1][0] = sLeft; /* v[0].attr[1].S */
- verts[0][1][1] = tTop; /* v[0].attr[1].T */
- verts[1][1][0] = sRight;
- verts[1][1][1] = tTop;
- verts[2][1][0] = sRight;
- verts[2][1][1] = tBot;
- verts[3][1][0] = sLeft;
- verts[3][1][1] = tBot;
-
- /* same for all verts: */
- if (color) {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /* v[i].attr[0].z */
- verts[i][0][3] = 1.0f; /* v[i].attr[0].w */
- verts[i][2][0] = color[0]; /* v[i].attr[2].r */
- verts[i][2][1] = color[1]; /* v[i].attr[2].g */
- verts[i][2][2] = color[2]; /* v[i].attr[2].b */
- verts[i][2][3] = color[3]; /* v[i].attr[2].a */
- verts[i][1][2] = 0.0f; /* v[i].attr[1].R */
- verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */
- }
- }
- else {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /*Z*/
- verts[i][0][3] = 1.0f; /*W*/
- verts[i][1][2] = 0.0f; /*R*/
- verts[i][1][3] = 1.0f; /*Q*/
- }
+#if USE_DRAWPIXELS_CACHE
+ /* Save the glDrawPixels parameter and image in the cache */
+ if ((unpack->RowLength == 0 || unpack->RowLength == width) &&
+ unpack->SkipPixels == 0 &&
+ unpack->SkipRows == 0) {
+ st->drawpix_cache.width = width;
+ st->drawpix_cache.height = height;
+ st->drawpix_cache.format = format;
+ st->drawpix_cache.type = type;
+ st->drawpix_cache.user_pointer = pixels;
+ assert(!st->drawpix_cache.image);
+ st->drawpix_cache.image = malloc(width * height * bpp);
+ if (st->drawpix_cache.image) {
+ memcpy(st->drawpix_cache.image, pixels, width * height * bpp);
}
+ st->drawpix_cache.texture = pt;
}
+#endif
- u_upload_unmap(st->uploader);
- util_draw_vertex_buffer(pipe, st->cso_context, buf,
- cso_get_aux_vertex_buffer_slot(st->cso_context),
- offset,
- PIPE_PRIM_QUADS,
- 4, /* verts */
- 3); /* attribs/vert */
- pipe_resource_reference(&buf, NULL);
+ return pt;
}
-
static void
draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
GLsizei width, GLsizei height,
@@ -554,9 +540,12 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
struct cso_context *cso = st->cso_context;
+ const unsigned fb_width = _mesa_geometric_width(ctx->DrawBuffer);
+ const unsigned fb_height = _mesa_geometric_height(ctx->DrawBuffer);
GLfloat x0, y0, x1, y1;
GLsizei maxSize;
boolean normalized = sv[0]->texture->target == PIPE_TEXTURE_2D;
+ unsigned cso_state_mask;
assert(sv[0]->texture->target == st->internal_target);
@@ -569,22 +558,19 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
assert(width <= maxSize);
assert(height <= maxSize);
- cso_save_rasterizer(cso);
- cso_save_viewport(cso);
- cso_save_fragment_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_fragment_shader(cso);
- cso_save_stream_outputs(cso);
- cso_save_vertex_shader(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_geometry_shader(cso);
- cso_save_vertex_elements(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_state_mask = (CSO_BIT_RASTERIZER |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BITS_ALL_SHADERS);
if (write_stencil) {
- cso_save_depth_stencil_alpha(cso);
- cso_save_blend(cso);
+ cso_state_mask |= (CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_BLEND);
}
+ cso_save_state(cso, cso_state_mask);
/* rasterizer state: just scissor */
{
@@ -694,28 +680,17 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
}
/* viewport state: viewport matching window dims */
- {
- const float w = (float) ctx->DrawBuffer->Width;
- const float h = (float) ctx->DrawBuffer->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * w;
- vp.scale[1] = -0.5f * h;
- vp.scale[2] = 0.5f;
- vp.translate[0] = 0.5f * w;
- vp.translate[1] = 0.5f * h;
- vp.translate[2] = 0.5f;
- cso_set_viewport(cso, &vp);
- }
+ cso_set_viewport_dims(cso, fb_width, fb_height, TRUE);
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
- cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
+ cso_set_vertex_elements(cso, 3, st->util_velems);
+ cso_set_stream_outputs(cso, 0, NULL, NULL);
/* Compute Gallium window coords (y=0=top) with pixel zoom.
* Recall that these coords are transformed by the current
* vertex shader and viewport transformation.
*/
if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
- y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY);
+ y = fb_height - (int) (y + height * ctx->Pixel.ZoomY);
invertTex = !invertTex;
}
@@ -727,27 +702,27 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
/* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
z = z * 2.0f - 1.0f;
- draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
- normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width,
- normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height);
+ {
+ const float clip_x0 = x0 / (float) fb_width * 2.0f - 1.0f;
+ const float clip_y0 = y0 / (float) fb_height * 2.0f - 1.0f;
+ const float clip_x1 = x1 / (float) fb_width * 2.0f - 1.0f;
+ const float clip_y1 = y1 / (float) fb_height * 2.0f - 1.0f;
+ const float maxXcoord = normalized ?
+ ((float) width / sv[0]->texture->width0) : (float) width;
+ const float maxYcoord = normalized
+ ? ((float) height / sv[0]->texture->height0) : (float) height;
+ const float sLeft = 0.0f, sRight = maxXcoord;
+ const float tTop = invertTex ? maxYcoord : 0.0f;
+ const float tBot = invertTex ? 0.0f : maxYcoord;
+
+ if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z,
+ sLeft, tBot, sRight, tTop, color, 0)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+ }
+ }
/* restore state */
- cso_restore_rasterizer(cso);
- cso_restore_viewport(cso);
- cso_restore_fragment_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
- cso_restore_stream_outputs(cso);
- if (write_stencil) {
- cso_restore_depth_stencil_alpha(cso);
- cso_restore_blend(cso);
- }
+ cso_restore_state(cso);
}
@@ -956,7 +931,7 @@ get_color_fp_variant(struct st_context *st)
ctx->Pixel.AlphaScale != 1.0);
key.pixelMaps = ctx->Pixel.MapColorFlag;
key.clamp_color = st->clamp_frag_color_in_shader &&
- st->ctx->Color._ClampFragmentColor;
+ ctx->Color._ClampFragmentColor;
fpv = st_get_fp_variant(st, st->fp, &key);
@@ -1060,7 +1035,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
{
void *driver_vp, *driver_fp;
struct st_context *st = st_context(ctx);
- const GLfloat *color;
struct pipe_context *pipe = st->pipe;
GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
struct pipe_sampler_view *sv[2] = { NULL };
@@ -1074,7 +1048,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
/* Limit the size of the glDrawPixels to the max texture size.
* Strictly speaking, that's not correct but since we don't handle
@@ -1106,7 +1080,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
driver_fp = get_drawpix_z_stencil_program(st, write_depth,
write_stencil);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
- color = ctx->Current.RasterColor;
}
else {
fpv = get_color_fp_variant(st);
@@ -1114,7 +1087,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
driver_fp = fpv->driver_shader;
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
- color = NULL;
if (ctx->Pixel.MapColorFlag) {
pipe_sampler_view_reference(&sv[1],
st->pixel_xfer.pixelmap_sampler_view);
@@ -1172,12 +1144,15 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
num_sampler_view,
driver_vp,
driver_fp, fpv,
- color, GL_FALSE, write_depth, write_stencil);
+ ctx->Current.RasterColor,
+ GL_FALSE, write_depth, write_stencil);
pipe_sampler_view_reference(&sv[0], NULL);
if (num_sampler_view > 1)
pipe_sampler_view_reference(&sv[1], NULL);
+#if !USE_DRAWPIXELS_CACHE
pipe_resource_reference(&pt, NULL);
+#endif
}
@@ -1427,7 +1402,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
struct pipe_sampler_view *sv[2] = { NULL };
struct st_fp_variant *fpv = NULL;
int num_sampler_view = 1;
- GLfloat *color;
enum pipe_format srcFormat;
unsigned srcBind;
GLboolean invertTex = GL_FALSE;
@@ -1436,7 +1410,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
if (type == GL_DEPTH_STENCIL) {
/* XXX make this more efficient */
@@ -1469,7 +1443,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
fpv = get_color_fp_variant(st);
rbRead = st_get_color_read_renderbuffer(ctx);
- color = NULL;
driver_fp = fpv->driver_shader;
driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
@@ -1490,7 +1463,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
assert(type == GL_DEPTH);
rbRead = st_renderbuffer(ctx->ReadBuffer->
Attachment[BUFFER_DEPTH].Renderbuffer);
- color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE);
driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
@@ -1622,7 +1594,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
num_sampler_view,
driver_vp,
driver_fp, fpv,
- color, invertTex, GL_FALSE, GL_FALSE);
+ ctx->Current.Attrib[VERT_ATTRIB_COLOR0],
+ invertTex, GL_FALSE, GL_FALSE);
pipe_resource_reference(&pt, NULL);
pipe_sampler_view_reference(&sv[0], NULL);
diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c
index e6ab77fb521..a7926295277 100644
--- a/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/src/mesa/state_tracker/st_cb_drawtex.c
@@ -49,6 +49,7 @@ struct cached_shader
/**
* Simple linear list cache.
* Most of the time there'll only be one cached shader.
+ * XXX This should be per-st_context state.
*/
static struct cached_shader CachedShaders[MAX_SHADERS];
static GLuint NumCachedShaders = 0;
@@ -99,13 +100,14 @@ lookup_shader(struct pipe_context *pipe,
return CachedShaders[i].handle;
}
+
static void
st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
GLfloat width, GLfloat height)
{
struct st_context *st = ctx->st;
struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = ctx->st->cso_context;
+ struct cso_context *cso = st->cso_context;
struct pipe_resource *vbuffer = NULL;
GLuint i, numTexCoords, numAttribs;
GLboolean emitColor;
@@ -116,7 +118,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
/* determine if we need vertex color */
if (ctx->FragmentProgram._Current->Base.InputsRead & VARYING_BIT_COL0)
@@ -150,7 +152,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height;
GLfloat *vbuf = NULL;
- GLuint attr;
+ GLuint tex_attr;
u_upload_alloc(st->uploader, 0,
numAttribs * 4 * 4 * sizeof(GLfloat), 4,
@@ -158,12 +160,12 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
if (!vbuffer) {
return;
}
-
+
z = CLAMP(z, 0.0f, 1.0f);
/* positions (in clip coords) */
{
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
const GLfloat fb_width = (GLfloat)fb->Width;
const GLfloat fb_height = (GLfloat)fb->Height;
@@ -190,10 +192,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]);
semantic_names[1] = TGSI_SEMANTIC_COLOR;
semantic_indexes[1] = 0;
- attr = 2;
+ tex_attr = 2;
}
else {
- attr = 1;
+ tex_attr = 1;
}
/* texcoords */
@@ -210,17 +212,17 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht;
/*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/
- SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */
- SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */
- SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */
- SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */
+ SET_ATTRIB(0, tex_attr, s0, t0, 0.0f, 1.0f); /* lower left */
+ SET_ATTRIB(1, tex_attr, s1, t0, 0.0f, 1.0f); /* lower right */
+ SET_ATTRIB(2, tex_attr, s1, t1, 0.0f, 1.0f); /* upper right */
+ SET_ATTRIB(3, tex_attr, s0, t1, 0.0f, 1.0f); /* upper left */
- semantic_names[attr] = st->needs_texcoord_semantic ?
+ semantic_names[tex_attr] = st->needs_texcoord_semantic ?
TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
/* XXX: should this use semantic index i instead of 0 ? */
- semantic_indexes[attr] = 0;
+ semantic_indexes[tex_attr] = 0;
- attr++;
+ tex_attr++;
}
}
@@ -229,15 +231,14 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
#undef SET_ATTRIB
}
-
- cso_save_viewport(cso);
- cso_save_stream_outputs(cso);
- cso_save_vertex_shader(cso);
- cso_save_tessctrl_shader(cso);
- cso_save_tesseval_shader(cso);
- cso_save_geometry_shader(cso);
- cso_save_vertex_elements(cso);
- cso_save_aux_vertex_buffer_slot(cso);
+ cso_save_state(cso, (CSO_BIT_VIEWPORT |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BIT_VERTEX_SHADER |
+ CSO_BIT_TESSCTRL_SHADER |
+ CSO_BIT_TESSEVAL_SHADER |
+ CSO_BIT_GEOMETRY_SHADER |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT));
{
void *vs = lookup_shader(pipe, numAttribs,
@@ -255,11 +256,11 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
}
cso_set_vertex_elements(cso, numAttribs, velements);
- cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
+ cso_set_stream_outputs(cso, 0, NULL, NULL);
/* viewport state: viewport matching window dims */
{
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
const GLfloat width = (GLfloat)fb->Width;
const GLfloat height = (GLfloat)fb->Height;
@@ -273,7 +274,6 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
cso_set_viewport(cso, &vp);
}
-
util_draw_vertex_buffer(pipe, cso, vbuffer,
cso_get_aux_vertex_buffer_slot(cso),
offset, /* offset */
@@ -281,18 +281,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
4, /* verts */
numAttribs); /* attribs/vert */
-
pipe_resource_reference(&vbuffer, NULL);
/* restore state */
- cso_restore_viewport(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_tessctrl_shader(cso);
- cso_restore_tesseval_shader(cso);
- cso_restore_geometry_shader(cso);
- cso_restore_vertex_elements(cso);
- cso_restore_aux_vertex_buffer_slot(cso);
- cso_restore_stream_outputs(cso);
+ cso_restore_state(cso);
}
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 2a2eb0992c8..82ab914503b 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -44,6 +44,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
+#include "st_atom.h"
#include "st_context.h"
#include "st_cb_fbo.h"
#include "st_cb_flush.h"
@@ -711,9 +712,17 @@ st_ReadBuffer(struct gl_context *ctx, GLenum buffer)
(void) buffer;
- /* add the renderbuffer on demand */
- if (fb->_ColorReadBufferIndex >= 0)
+ /* Check if we need to allocate a front color buffer.
+ * Front buffers are often allocated on demand (other color buffers are
+ * always allocated in advance).
+ */
+ if ((fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT ||
+ fb->_ColorReadBufferIndex == BUFFER_FRONT_RIGHT) &&
+ fb->Attachment[fb->_ColorReadBufferIndex].Type == GL_NONE) {
+ /* add the buffer */
st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex);
+ st_validate_state(st, ST_PIPELINE_RENDER);
+ }
}
diff --git a/src/mesa/state_tracker/st_cb_msaa.c b/src/mesa/state_tracker/st_cb_msaa.c
index e9955b62b8f..d581f2121b0 100644
--- a/src/mesa/state_tracker/st_cb_msaa.c
+++ b/src/mesa/state_tracker/st_cb_msaa.c
@@ -44,7 +44,7 @@ st_GetSamplePosition(struct gl_context *ctx,
{
struct st_context *st = st_context(ctx);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
if (st->pipe->get_sample_position)
st->pipe->get_sample_position(st->pipe, (unsigned) fb->Visual.samples,
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index ca493d84715..27cc0f3d154 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -74,6 +74,9 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog)
case GL_TESS_EVALUATION_PROGRAM_NV:
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
break;
+ case GL_COMPUTE_PROGRAM_NV:
+ st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM;
+ break;
}
}
@@ -92,6 +95,7 @@ st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg)
st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
+ st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM;
}
@@ -123,6 +127,10 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id)
struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program);
return _mesa_init_gl_program(&prog->Base.Base, target, id);
}
+ case GL_COMPUTE_PROGRAM_NV: {
+ struct st_compute_program *prog = ST_CALLOC_STRUCT(st_compute_program);
+ return _mesa_init_gl_program(&prog->Base.Base, target, id);
+ }
default:
assert(0);
return NULL;
@@ -195,6 +203,17 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
}
break;
+ case GL_COMPUTE_PROGRAM_NV:
+ {
+ struct st_compute_program *stcp =
+ (struct st_compute_program *) prog;
+
+ st_release_cp_variants(st, stcp);
+
+ if (stcp->glsl_to_tgsi)
+ free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
+ }
+ break;
default:
assert(0); /* problem */
}
@@ -272,6 +291,17 @@ st_program_string_notify( struct gl_context *ctx,
if (st->tep == sttep)
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
}
+ else if (target == GL_COMPUTE_PROGRAM_NV) {
+ struct st_compute_program *stcp =
+ (struct st_compute_program *) prog;
+
+ st_release_cp_variants(st, stcp);
+ if (!st_translate_compute_program(st, stcp))
+ return false;
+
+ if (st->cp == stcp)
+ st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM;
+ }
if (ST_DEBUG & DEBUG_PRECOMPILE ||
st->shader_has_one_variant[stage])
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c
index 747b41464ae..eec72f8a412 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -248,7 +248,7 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
draw_set_rasterize_stage(st->draw, st->rastpos_stage);
/* make sure everything's up to date */
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
/* This will get set only if rastpos_point(), above, gets called */
ctx->Current.RasterPosValid = GL_FALSE;
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index bb36e6969d6..5153c4bbba1 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -81,7 +81,7 @@ needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
* we do here should be free in such cases.
*/
static void
-st_readpixels(struct gl_context *ctx, GLint x, GLint y,
+st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack,
@@ -104,7 +104,7 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
/* Validate state (to be sure we have up-to-date framebuffer surfaces)
* and flush the bitmap cache prior to reading. */
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
st_flush_bitmap_cache(st);
if (!st->prefer_blit_based_texture_transfer) {
@@ -257,5 +257,5 @@ fallback:
void st_init_readpixels_functions(struct dd_function_table *functions)
{
- functions->ReadPixels = st_readpixels;
+ functions->ReadPixels = st_ReadPixels;
}
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index d53126a9441..cfec627f10c 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1272,10 +1272,11 @@ create_pbo_upload_fs(struct st_context *st)
ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
}
+ /* temp0.w = 0 */
+ ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));
+
/* out = txf(sampler, temp0.x) */
- ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER,
- ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X),
- sampler);
+ ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);
ureg_release_temporary(ureg, temp0);
@@ -1297,6 +1298,7 @@ try_pbo_upload_common(struct gl_context *ctx,
unsigned image_height)
{
struct st_context *st = st_context(ctx);
+ struct cso_context *cso = st->cso_context;
struct pipe_context *pipe = st->pipe;
unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1;
unsigned skip_pixels = 0;
@@ -1333,6 +1335,20 @@ try_pbo_upload_common(struct gl_context *ctx,
return false;
}
+ cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+ CSO_BIT_FRAGMENT_SAMPLERS |
+ CSO_BIT_VERTEX_ELEMENTS |
+ CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+ CSO_BIT_FRAMEBUFFER |
+ CSO_BIT_VIEWPORT |
+ CSO_BIT_BLEND |
+ CSO_BIT_DEPTH_STENCIL_ALPHA |
+ CSO_BIT_RASTERIZER |
+ CSO_BIT_STREAM_OUTPUTS |
+ CSO_BITS_ALL_SHADERS));
+ cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
+
+
/* Set up the sampler_view */
{
unsigned first_element = buf_offset;
@@ -1340,14 +1356,17 @@ try_pbo_upload_common(struct gl_context *ctx,
+ (upload_height - 1 + (depth - 1) * image_height) * stride;
struct pipe_sampler_view templ;
struct pipe_sampler_view *sampler_view;
+ struct pipe_sampler_state sampler = {0};
+ const struct pipe_sampler_state *samplers[1] = {&sampler};
/* This should be ensured by Mesa before calling our callbacks */
assert((last_element + 1) * bytes_per_pixel <= buffer->width0);
if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1)
- return false;
+ goto fail;
memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_BUFFER;
templ.format = src_format;
templ.u.buf.first_element = first_element;
templ.u.buf.last_element = last_element;
@@ -1358,13 +1377,13 @@ try_pbo_upload_common(struct gl_context *ctx,
sampler_view = pipe->create_sampler_view(pipe, buffer, &templ);
if (sampler_view == NULL)
- return false;
+ goto fail;
- cso_save_fragment_sampler_views(st->cso_context);
- cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
- &sampler_view);
+ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view);
pipe_sampler_view_reference(&sampler_view, NULL);
+
+ cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
}
/* Upload vertices */
@@ -1386,7 +1405,7 @@ try_pbo_upload_common(struct gl_context *ctx,
u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4,
&vbo.buffer_offset, &vbo.buffer, (void **) &verts);
if (!verts)
- goto fail_vertex_upload;
+ goto fail;
verts[0] = x0;
verts[1] = y0;
@@ -1401,30 +1420,28 @@ try_pbo_upload_common(struct gl_context *ctx,
velem.src_offset = 0;
velem.instance_divisor = 0;
- velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context);
+ velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso);
velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
- cso_save_vertex_elements(st->cso_context);
- cso_set_vertex_elements(st->cso_context, 1, &velem);
+ cso_set_vertex_elements(cso, 1, &velem);
- cso_save_aux_vertex_buffer_slot(st->cso_context);
- cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index,
- 1, &vbo);
+ cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo);
pipe_resource_reference(&vbo.buffer, NULL);
}
/* Upload constants */
+ /* Note: the user buffer must be valid until draw time */
+ struct {
+ int32_t xoffset;
+ int32_t yoffset;
+ int32_t stride;
+ int32_t image_size;
+ } constants;
+
{
struct pipe_constant_buffer cb;
- struct {
- int32_t xoffset;
- int32_t yoffset;
- int32_t stride;
- int32_t image_size;
- } constants;
-
constants.xoffset = -xoffset + skip_pixels;
constants.yoffset = -yoffset;
constants.stride = stride;
@@ -1434,10 +1451,10 @@ try_pbo_upload_common(struct gl_context *ctx,
cb.buffer = NULL;
cb.user_buffer = NULL;
u_upload_data(st->constbuf_uploader, 0, sizeof(constants),
- st->ctx->Const.UniformBufferOffsetAlignment,
+ ctx->Const.UniformBufferOffsetAlignment,
&constants, &cb.buffer_offset, &cb.buffer);
if (!cb.buffer)
- goto fail_constant_upload;
+ goto fail;
u_upload_unmap(st->constbuf_uploader);
} else {
@@ -1447,8 +1464,7 @@ try_pbo_upload_common(struct gl_context *ctx,
}
cb.buffer_size = sizeof(constants);
- cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
- cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb);
+ cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb);
pipe_resource_reference(&cb.buffer, NULL);
}
@@ -1462,80 +1478,52 @@ try_pbo_upload_common(struct gl_context *ctx,
fb.nr_cbufs = 1;
pipe_surface_reference(&fb.cbufs[0], surface);
- cso_save_framebuffer(st->cso_context);
- cso_set_framebuffer(st->cso_context, &fb);
+ cso_set_framebuffer(cso, &fb);
pipe_surface_reference(&fb.cbufs[0], NULL);
}
- /* Viewport state */
- {
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * surface->width;
- vp.scale[1] = 0.5f * surface->height;
- vp.scale[2] = 1.0f;
- vp.translate[0] = 0.5f * surface->width;
- vp.translate[1] = 0.5f * surface->height;
- vp.translate[2] = 0.0f;
-
- cso_save_viewport(st->cso_context);
- cso_set_viewport(st->cso_context, &vp);
- }
+ cso_set_viewport_dims(cso, surface->width, surface->height, FALSE);
/* Blend state */
- cso_save_blend(st->cso_context);
- cso_set_blend(st->cso_context, &st->pbo_upload.blend);
+ cso_set_blend(cso, &st->pbo_upload.blend);
+
+ /* Depth/stencil/alpha state */
+ {
+ struct pipe_depth_stencil_alpha_state dsa;
+ memset(&dsa, 0, sizeof(dsa));
+ cso_set_depth_stencil_alpha(cso, &dsa);
+ }
/* Rasterizer state */
- cso_save_rasterizer(st->cso_context);
- cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster);
+ cso_set_rasterizer(cso, &st->pbo_upload.raster);
/* Set up the shaders */
- cso_save_vertex_shader(st->cso_context);
- cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs);
+ cso_set_vertex_shader_handle(cso, st->pbo_upload.vs);
- cso_save_geometry_shader(st->cso_context);
- cso_set_geometry_shader_handle(st->cso_context,
- depth != 1 ? st->pbo_upload.gs : NULL);
+ cso_set_geometry_shader_handle(cso, depth != 1 ? st->pbo_upload.gs : NULL);
- cso_save_tessctrl_shader(st->cso_context);
- cso_set_tessctrl_shader_handle(st->cso_context, NULL);
+ cso_set_tessctrl_shader_handle(cso, NULL);
- cso_save_tesseval_shader(st->cso_context);
- cso_set_tesseval_shader_handle(st->cso_context, NULL);
+ cso_set_tesseval_shader_handle(cso, NULL);
- cso_save_fragment_shader(st->cso_context);
- cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs);
+ cso_set_fragment_shader_handle(cso, st->pbo_upload.fs);
/* Disable stream output */
- cso_save_stream_outputs(st->cso_context);
- cso_set_stream_outputs(st->cso_context, 0, NULL, 0);
+ cso_set_stream_outputs(cso, 0, NULL, 0);
if (depth == 1) {
- cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+ cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
} else {
- cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP,
+ cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP,
0, 4, 0, depth);
}
success = true;
- cso_restore_framebuffer(st->cso_context);
- cso_restore_viewport(st->cso_context);
- cso_restore_blend(st->cso_context);
- cso_restore_rasterizer(st->cso_context);
- cso_restore_vertex_shader(st->cso_context);
- cso_restore_geometry_shader(st->cso_context);
- cso_restore_tessctrl_shader(st->cso_context);
- cso_restore_tesseval_shader(st->cso_context);
- cso_restore_fragment_shader(st->cso_context);
- cso_restore_stream_outputs(st->cso_context);
- cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
-fail_constant_upload:
- cso_restore_vertex_elements(st->cso_context);
- cso_restore_aux_vertex_buffer_slot(st->cso_context);
-fail_vertex_upload:
- cso_restore_fragment_sampler_views(st->cso_context);
+fail:
+ cso_restore_state(cso);
+ cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
return success;
}
@@ -2752,7 +2740,7 @@ st_finalize_texture(struct gl_context *ctx,
{
struct st_context *st = st_context(ctx);
struct st_texture_object *stObj = st_texture_object(tObj);
- const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+ const GLuint nr_faces = _mesa_num_tex_faces(stObj->base.Target);
GLuint face;
const struct st_texture_image *firstImage;
enum pipe_format firstImageFormat;
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 9016846b148..e3ddee660f7 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -43,6 +43,7 @@
#include "st_cb_blit.h"
#include "st_cb_bufferobjects.h"
#include "st_cb_clear.h"
+#include "st_cb_compute.h"
#include "st_cb_condrender.h"
#include "st_cb_copyimage.h"
#include "st_cb_drawpixels.h"
@@ -138,8 +139,11 @@ void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state)
st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
}
+ /* Invalidate render and compute pipelines. */
st->dirty.mesa |= new_state;
st->dirty.st |= ST_NEW_MESA;
+ st->dirty_cp.mesa |= new_state;
+ st->dirty_cp.st |= ST_NEW_MESA;
/* This is the only core Mesa module we depend upon.
* No longer use swrast, swsetup, tnl.
@@ -182,6 +186,10 @@ st_destroy_context_priv(struct st_context *st)
u_upload_destroy(st->constbuf_uploader);
}
+ /* free glDrawPixels cache data */
+ free(st->drawpix_cache.image);
+ pipe_resource_reference(&st->drawpix_cache.texture, NULL);
+
cso_destroy_context(st->cso_context);
free( st );
}
@@ -208,8 +216,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* state tracker needs the VBO module */
_vbo_CreateContext(ctx);
+ /* Initialize render and compute pipelines flags */
st->dirty.mesa = ~0;
st->dirty.st = ~0;
+ st->dirty_cp.mesa = ~0;
+ st->dirty_cp.st = ~0;
/* Create upload manager for vertex data for glBitmap, glDrawPixels,
* glClear, etc.
@@ -241,16 +252,30 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
else
st->internal_target = PIPE_TEXTURE_RECT;
- /* Vertex element objects used for drawing rectangles for glBitmap,
- * glDrawPixels, glClear, etc.
+ /* Setup vertex element info for 'struct st_util_vertex'.
*/
- for (i = 0; i < ARRAY_SIZE(st->velems_util_draw); i++) {
- memset(&st->velems_util_draw[i], 0, sizeof(struct pipe_vertex_element));
- st->velems_util_draw[i].src_offset = i * 4 * sizeof(float);
- st->velems_util_draw[i].instance_divisor = 0;
- st->velems_util_draw[i].vertex_buffer_index =
- cso_get_aux_vertex_buffer_slot(st->cso_context);
- st->velems_util_draw[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ {
+ const unsigned slot = cso_get_aux_vertex_buffer_slot(st->cso_context);
+
+ /* If this assertion ever fails all state tracker calls to
+ * cso_get_aux_vertex_buffer_slot() should be audited. This
+ * particular call would have to be moved to just before each
+ * drawing call.
+ */
+ assert(slot == 0);
+
+ STATIC_ASSERT(sizeof(struct st_util_vertex) == 9 * sizeof(float));
+
+ memset(&st->util_velems, 0, sizeof(st->util_velems));
+ st->util_velems[0].src_offset = 0;
+ st->util_velems[0].vertex_buffer_index = slot;
+ st->util_velems[0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ st->util_velems[1].src_offset = 3 * sizeof(float);
+ st->util_velems[1].vertex_buffer_index = slot;
+ st->util_velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ st->util_velems[2].src_offset = 7 * sizeof(float);
+ st->util_velems[2].vertex_buffer_index = slot;
+ st->util_velems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
}
/* we want all vertex data to be placed in buffer objects */
@@ -262,9 +287,9 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* Need these flags:
*/
- st->ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
st->has_stencil_export =
screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT);
@@ -328,8 +353,8 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* called after _mesa_create_context/_mesa_init_point, fix default user
* settable max point size up
*/
- st->ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize,
- ctx->Const.MaxPointSizeAA);
+ ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize,
+ ctx->Const.MaxPointSizeAA);
/* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3;
@@ -377,6 +402,7 @@ static void st_init_driver_flags(struct gl_driver_flags *f)
f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER;
+ f->NewImageUnits = ST_NEW_IMAGE_UNITS;
}
struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
@@ -440,6 +466,7 @@ void st_destroy_context( struct st_context *st )
st_reference_vertprog(st, &st->vp, NULL);
st_reference_tesscprog(st, &st->tcp, NULL);
st_reference_tesseprog(st, &st->tep, NULL);
+ st_reference_compprog(st, &st->cp, NULL);
/* release framebuffer surfaces */
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
@@ -449,7 +476,7 @@ void st_destroy_context( struct st_context *st )
pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL);
pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL);
- _vbo_DestroyContext(st->ctx);
+ _vbo_DestroyContext(ctx);
st_destroy_program_variants(st);
@@ -503,6 +530,7 @@ void st_init_driver_functions(struct pipe_screen *screen,
st_init_flush_functions(screen, functions);
st_init_string_functions(functions);
st_init_viewport_functions(functions);
+ st_init_compute_functions(functions);
st_init_xformfb_functions(functions);
st_init_syncobj_functions(functions);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 9a80f4bae70..f960c64cbe8 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -64,6 +64,8 @@ struct u_upload_mgr;
#define ST_NEW_SAMPLER_VIEWS (1 << 11)
#define ST_NEW_ATOMIC_BUFFER (1 << 12)
#define ST_NEW_STORAGE_BUFFER (1 << 13)
+#define ST_NEW_COMPUTE_PROGRAM (1 << 14)
+#define ST_NEW_IMAGE_UNITS (1 << 15)
struct st_state_flags {
@@ -78,6 +80,23 @@ struct st_tracked_state {
};
+/**
+ * Enumeration of state tracker pipelines.
+ */
+enum st_pipeline {
+ ST_PIPELINE_RENDER,
+ ST_PIPELINE_COMPUTE,
+};
+
+
+/** For drawing quads for glClear, glDraw/CopyPixels, glBitmap, etc. */
+struct st_util_vertex
+{
+ float x, y, z;
+ float r, g, b, a;
+ float s, t;
+};
+
struct st_context
{
@@ -153,6 +172,7 @@ struct st_context
char renderer[100];
struct st_state_flags dirty;
+ struct st_state_flags dirty_cp;
GLboolean vertdata_edgeflags;
GLboolean edgeflag_culls_prims;
@@ -165,12 +185,14 @@ struct st_context
struct st_geometry_program *gp; /**< Currently bound geometry program */
struct st_tessctrl_program *tcp; /**< Currently bound tess control program */
struct st_tesseval_program *tep; /**< Currently bound tess eval program */
+ struct st_compute_program *cp; /**< Currently bound compute program */
struct st_vp_variant *vp_variant;
struct st_fp_variant *fp_variant;
struct st_basic_variant *gp_variant;
struct st_basic_variant *tcp_variant;
struct st_basic_variant *tep_variant;
+ struct st_basic_variant *cp_variant;
struct gl_texture_object *default_texture;
@@ -183,6 +205,7 @@ struct st_context
struct {
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
+ struct pipe_sampler_state atlas_sampler;
enum pipe_format tex_format;
void *vs;
struct bitmap_cache *cache;
@@ -194,6 +217,14 @@ struct st_context
void *vert_shaders[2]; /**< ureg shaders */
} drawpix;
+ struct {
+ GLsizei width, height;
+ GLenum format, type;
+ const void *user_pointer; /**< Last user 'pixels' pointer */
+ void *image; /**< Copy of the glDrawPixels image data */
+ struct pipe_resource *texture;
+ } drawpix_cache;
+
/** for glClear */
struct {
struct pipe_rasterizer_state raster;
@@ -217,8 +248,8 @@ struct st_context
bool use_gs;
} pbo_upload;
- /** used for anything using util_draw_vertex_buffer */
- struct pipe_vertex_element velems_util_draw[3];
+ /** for drawing with st_util_vertex */
+ struct pipe_vertex_element util_velems[3];
void *passthrough_fs; /**< simple pass-through frag shader */
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 10e294cd147..2de6620602d 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -60,7 +60,7 @@
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/u_prim.h"
-#include "util/u_draw_quad.h"
+#include "util/u_draw.h"
#include "util/u_upload_mgr.h"
#include "draw/draw_context.h"
#include "cso_cache/cso_context.h"
@@ -202,7 +202,7 @@ st_draw_vbo(struct gl_context *ctx,
/* Validate state. */
if (st->dirty.st || ctx->NewDriverState) {
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
#if 0
if (MESA_VERBOSE & VERBOSE_GLSL) {
@@ -315,7 +315,7 @@ st_indirect_draw_vbo(struct gl_context *ctx,
/* Validate state. */
if (st->dirty.st || ctx->NewDriverState) {
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
}
if (st->vertex_array_out_of_memory) {
@@ -398,3 +398,93 @@ st_destroy_draw(struct st_context *st)
{
draw_destroy(st->draw);
}
+
+
+/**
+ * Draw a quad with given position, texcoords and color.
+ */
+bool
+st_draw_quad(struct st_context *st,
+ float x0, float y0, float x1, float y1, float z,
+ float s0, float t0, float s1, float t1,
+ const float *color,
+ unsigned num_instances)
+{
+ struct pipe_vertex_buffer vb = {0};
+ struct st_util_vertex *verts;
+
+ vb.stride = sizeof(struct st_util_vertex);
+
+ u_upload_alloc(st->uploader, 0, 4 * sizeof(struct st_util_vertex), 4,
+ &vb.buffer_offset, &vb.buffer, (void **) &verts);
+ if (!vb.buffer) {
+ return false;
+ }
+
+ /* lower-left */
+ verts[0].x = x0;
+ verts[0].y = y1;
+ verts[0].z = z;
+ verts[0].r = color[0];
+ verts[0].g = color[1];
+ verts[0].b = color[2];
+ verts[0].a = color[3];
+ verts[0].s = s0;
+ verts[0].t = t0;
+
+ /* lower-right */
+ verts[1].x = x1;
+ verts[1].y = y1;
+ verts[1].z = z;
+ verts[1].r = color[0];
+ verts[1].g = color[1];
+ verts[1].b = color[2];
+ verts[1].a = color[3];
+ verts[1].s = s1;
+ verts[1].t = t0;
+
+ /* upper-right */
+ verts[2].x = x1;
+ verts[2].y = y0;
+ verts[2].z = z;
+ verts[2].r = color[0];
+ verts[2].g = color[1];
+ verts[2].b = color[2];
+ verts[2].a = color[3];
+ verts[2].s = s1;
+ verts[2].t = t1;
+
+ /* upper-left */
+ verts[3].x = x0;
+ verts[3].y = y0;
+ verts[3].z = z;
+ verts[3].r = color[0];
+ verts[3].g = color[1];
+ verts[3].b = color[2];
+ verts[3].a = color[3];
+ verts[3].s = s0;
+ verts[3].t = t1;
+
+ u_upload_unmap(st->uploader);
+
+ /* At the time of writing, cso_get_aux_vertex_buffer_slot() always returns
+ * zero. If that ever changes we need to audit the calls to that function
+ * and make sure the slot number is used consistently everywhere.
+ */
+ assert(cso_get_aux_vertex_buffer_slot(st->cso_context) == 0);
+
+ cso_set_vertex_buffers(st->cso_context,
+ cso_get_aux_vertex_buffer_slot(st->cso_context),
+ 1, &vb);
+
+ if (num_instances > 1) {
+ cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4,
+ 0, num_instances);
+ } else {
+ cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4);
+ }
+
+ pipe_resource_reference(&vb.buffer, NULL);
+
+ return true;
+}
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index a973c8a4a5d..d85c3b7facd 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -85,4 +85,11 @@ pointer_to_offset(const void *ptr)
}
+bool
+st_draw_quad(struct st_context *st,
+ float x0, float y0, float x1, float y1, float z,
+ float s0, float t0, float s1, float t1,
+ const float *color,
+ unsigned num_instances);
+
#endif
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index b6e6dea5b27..9f48945d74d 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -140,7 +140,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
st_flush_bitmap_cache(st);
- st_validate_state(st);
+ st_validate_state(st, ST_PIPELINE_RENDER);
if (!index_bounds_valid)
vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index feabe6290eb..24c64447f44 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -75,6 +75,7 @@ static int _clamp(int a, int min, int max)
void st_init_limits(struct pipe_screen *screen,
struct gl_constants *c, struct gl_extensions *extensions)
{
+ int supported_irs;
unsigned sh;
boolean can_ubo = TRUE;
@@ -174,9 +175,19 @@ void st_init_limits(struct pipe_screen *screen,
pc = &c->Program[MESA_SHADER_TESS_EVAL];
options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_EVAL];
break;
+ case PIPE_SHADER_COMPUTE:
+ pc = &c->Program[MESA_SHADER_COMPUTE];
+ options = &c->ShaderCompilerOptions[MESA_SHADER_COMPUTE];
+
+ if (!screen->get_param(screen, PIPE_CAP_COMPUTE))
+ continue;
+ supported_irs =
+ screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUPPORTED_IRS);
+ if (!(supported_irs & (1 << PIPE_SHADER_IR_TGSI)))
+ continue;
+ break;
default:
- /* compute shader, etc. */
- continue;
+ assert(0);
}
pc->MaxTextureImageUnits =
@@ -223,6 +234,9 @@ void st_init_limits(struct pipe_screen *screen,
screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2;
pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers;
+ pc->MaxImageUniforms = screen->get_shader_param(
+ screen, sh, PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
+
/* Gallium doesn't really care about local vs. env parameters so use the
* same limits.
*/
@@ -261,6 +275,9 @@ void st_init_limits(struct pipe_screen *screen,
options->LowerClipDistance = true;
options->LowerBufferInterfaceBlocks = true;
+
+ if (sh == PIPE_SHADER_COMPUTE)
+ options->LowerShaderSharedVariables = true;
}
c->LowerTessLevel = true;
@@ -270,7 +287,8 @@ void st_init_limits(struct pipe_screen *screen,
c->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits +
c->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits +
c->Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
- c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits,
+ c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
+ c->Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits,
MAX_COMBINED_TEXTURE_IMAGE_UNITS);
/* This depends on program constants. */
@@ -330,7 +348,8 @@ void st_init_limits(struct pipe_screen *screen,
c->Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks +
c->Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks +
c->Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks +
- c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks;
+ c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks +
+ c->Program[MESA_SHADER_COMPUTE].MaxUniformBlocks;
assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
}
@@ -363,6 +382,21 @@ void st_init_limits(struct pipe_screen *screen,
c->MaxShaderStorageBlockSize = 1 << 27;
extensions->ARB_shader_storage_buffer_object = GL_TRUE;
}
+
+ c->MaxCombinedImageUniforms =
+ c->Program[MESA_SHADER_VERTEX].MaxImageUniforms +
+ c->Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms +
+ c->Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms +
+ c->Program[MESA_SHADER_GEOMETRY].MaxImageUniforms +
+ c->Program[MESA_SHADER_FRAGMENT].MaxImageUniforms +
+ c->Program[MESA_SHADER_COMPUTE].MaxImageUniforms;
+ c->MaxCombinedShaderOutputResources += c->MaxCombinedImageUniforms;
+ c->MaxImageUnits = MAX_IMAGE_UNITS;
+ c->MaxImageSamples = 0; /* XXX */
+ if (c->MaxCombinedImageUniforms) {
+ extensions->ARB_shader_image_load_store = GL_TRUE;
+ extensions->ARB_shader_image_size = GL_TRUE;
+ }
}
@@ -1012,4 +1046,31 @@ void st_init_extensions(struct pipe_screen *screen,
if ((ST_DEBUG & DEBUG_GREMEDY) &&
screen->get_param(screen, PIPE_CAP_STRING_MARKER))
extensions->GREMEDY_string_marker = GL_TRUE;
+
+ if (screen->get_param(screen, PIPE_CAP_COMPUTE)) {
+ int compute_supported_irs =
+ screen->get_shader_param(screen, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_SUPPORTED_IRS);
+ if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
+ uint64_t grid_size[3], block_size[3];
+
+ screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
+ grid_size);
+ screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
+ block_size);
+ screen->get_compute_param(screen,
+ PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
+ &consts->MaxComputeWorkGroupInvocations);
+ screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
+ &consts->MaxComputeSharedMemorySize);
+
+ for (i = 0; i < 3; i++) {
+ consts->MaxComputeWorkGroupCount[i] = grid_size[i];
+ consts->MaxComputeWorkGroupSize[i] = block_size[i];
+ }
+ /* XXX: ARB_compute_shader is not enabled by default because images
+ * support is still not implemented yet. */
+ /* extensions->ARB_compute_shader = true; */
+ }
+ }
}
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 2b92bade440..82bf3a185ad 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1484,6 +1484,48 @@ static const struct format_mapping format_map[] = {
{ PIPE_FORMAT_ETC1_RGB8, 0 }
},
+ /* ETC2 */
+ {
+ { GL_COMPRESSED_RGB8_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_RGB8, 0 }
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_SRGB8, 0 }
+ },
+ {
+ { GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_RGB8A1, 0 }
+ },
+ {
+ { GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 },
+ { PIPE_FORMAT_ETC2_SRGB8A1, 0 }
+ },
+ {
+ { GL_COMPRESSED_RGBA8_ETC2_EAC, 0 },
+ { PIPE_FORMAT_ETC2_RGBA8, 0 }
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, 0 },
+ { PIPE_FORMAT_ETC2_SRGBA8, 0 }
+ },
+ {
+ { GL_COMPRESSED_R11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_R11_UNORM, 0 }
+ },
+ {
+ { GL_COMPRESSED_SIGNED_R11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_R11_SNORM, 0 }
+ },
+ {
+ { GL_COMPRESSED_RG11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_RG11_UNORM, 0 }
+ },
+ {
+ { GL_COMPRESSED_SIGNED_RG11_EAC, 0 },
+ { PIPE_FORMAT_ETC2_RG11_SNORM, 0 }
+ },
+
/* BPTC */
{
{ GL_COMPRESSED_RGBA_BPTC_UNORM, 0 },
diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h
index 90e00e8ebf6..3e10aa64bc6 100644
--- a/src/mesa/state_tracker/st_format.h
+++ b/src/mesa/state_tracker/st_format.h
@@ -36,6 +36,10 @@
#include "pipe/p_defines.h"
#include "pipe/p_format.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct gl_context;
struct pipe_screen;
@@ -77,4 +81,8 @@ st_translate_color(const union gl_color_union *colorIn,
union pipe_color_union *colorOut,
GLenum baseFormat, GLboolean is_integer);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* ST_FORMAT_H */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 2ad91ecf4df..943582d447a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -40,6 +40,7 @@
#include "main/shaderobj.h"
#include "main/uniforms.h"
#include "main/shaderapi.h"
+#include "main/shaderimage.h"
#include "program/prog_instruction.h"
#include "pipe/p_context.h"
@@ -50,6 +51,7 @@
#include "util/u_memory.h"
#include "st_program.h"
#include "st_mesa_to_tgsi.h"
+#include "st_format.h"
#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
@@ -262,6 +264,7 @@ public:
int tex_target; /**< One of TEXTURE_*_INDEX */
glsl_base_type tex_type;
GLboolean tex_shadow;
+ unsigned image_format;
st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned tex_offset_num_offset;
@@ -395,6 +398,9 @@ public:
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
int buffers_used;
+ int images_used;
+ int image_targets[PIPE_MAX_SHADER_IMAGES];
+ unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
bool indirect_addr_consts;
int wpos_transform_const;
@@ -402,6 +408,7 @@ public:
bool native_integers;
bool have_sqrt;
bool have_fma;
+ bool use_shared_memory;
variable_storage *find_variable_storage(ir_variable *var);
@@ -451,6 +458,8 @@ public:
void visit_atomic_counter_intrinsic(ir_call *);
void visit_ssbo_intrinsic(ir_call *);
void visit_membar_intrinsic(ir_call *);
+ void visit_shared_intrinsic(ir_call *);
+ void visit_image_intrinsic(ir_call *);
st_src_reg result;
@@ -1214,6 +1223,7 @@ attrib_type_size(const struct glsl_type *type, bool is_vs_input)
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_FUNCTION:
assert(!"Invalid type in type_size");
break;
}
@@ -1969,6 +1979,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
case ir_unop_u2i:
/* Converting between signed and unsigned integers is a no-op. */
result_src = op[0];
+ result_src.type = result_dst.type;
break;
case ir_unop_b2i:
if (native_integers) {
@@ -3341,6 +3352,239 @@ glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
}
void
+glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_load_shared", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_store_shared", callee)) {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+ dst.writemask = write_mask->value.u[0];
+
+ dst.type = this->result.type;
+ inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
+ inst->buffer = buffer;
+ } else {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ st_src_reg data = this->result, data2 = undef_src;
+ unsigned opcode;
+ if (!strcmp("__intrinsic_atomic_add_shared", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_atomic_min_shared", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_atomic_max_shared", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_atomic_and_shared", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_atomic_or_shared", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_atomic_xor_shared", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_atomic_exchange_shared", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
+ opcode = TGSI_OPCODE_ATOMCAS;
+ param = param->get_next();
+ val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+ data2 = this->result;
+ } else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, off, data, data2);
+ inst->buffer = buffer;
+ }
+}
+
+void
+glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_dereference *img = (ir_dereference *)param;
+ const ir_variable *imgvar = img->variable_referenced();
+ const glsl_type *type = imgvar->type->without_array();
+ unsigned sampler_array_size = 1, sampler_base = 0;
+
+ st_src_reg reladdr;
+ st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
+
+ get_deref_offsets(img, &sampler_array_size, &sampler_base,
+ (unsigned int *)&image.index, &reladdr);
+ if (reladdr.file != PROGRAM_UNDEFINED) {
+ emit_arl(ir, sampler_reladdr, reladdr);
+ image.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(image.reladdr, &sampler_reladdr, sizeof(reladdr));
+ }
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_image_size", callee)) {
+ dst.writemask = WRITEMASK_XYZ;
+ inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
+ } else if (!strcmp("__intrinsic_image_samples", callee)) {
+ st_src_reg res = get_temp(glsl_type::ivec4_type);
+ st_dst_reg dstres = st_dst_reg(res);
+ dstres.writemask = WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
+ res.swizzle = SWIZZLE_WWWW;
+ inst = emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
+ } else {
+ st_src_reg arg1 = undef_src, arg2 = undef_src;
+ st_src_reg coord;
+ st_dst_reg coord_dst;
+ coord = get_temp(glsl_type::ivec4_type);
+ coord_dst = st_dst_reg(coord);
+ coord_dst.writemask = (1 << type->coordinate_components()) - 1;
+ param = param->get_next();
+ ((ir_dereference *)param)->accept(this);
+ emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+ coord.swizzle = SWIZZLE_XXXX;
+ switch (type->coordinate_components()) {
+ case 4: assert(!"unexpected coord count");
+ /* fallthrough */
+ case 3: coord.swizzle |= SWIZZLE_Z << 6;
+ /* fallthrough */
+ case 2: coord.swizzle |= SWIZZLE_Y << 3;
+ }
+
+ if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
+ param = param->get_next();
+ ((ir_dereference *)param)->accept(this);
+ st_src_reg sample = this->result;
+ sample.swizzle = SWIZZLE_XXXX;
+ coord_dst.writemask = WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
+ coord.swizzle |= SWIZZLE_W << 9;
+ }
+
+ param = param->get_next();
+ if (!param->is_tail_sentinel()) {
+ ((ir_dereference *)param)->accept(this);
+ arg1 = this->result;
+ param = param->get_next();
+ }
+
+ if (!param->is_tail_sentinel()) {
+ ((ir_dereference *)param)->accept(this);
+ arg2 = this->result;
+ param = param->get_next();
+ }
+
+ assert(param->is_tail_sentinel());
+
+ unsigned opcode;
+ if (!strcmp("__intrinsic_image_load", callee))
+ opcode = TGSI_OPCODE_LOAD;
+ else if (!strcmp("__intrinsic_image_store", callee))
+ opcode = TGSI_OPCODE_STORE;
+ else if (!strcmp("__intrinsic_image_atomic_add", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_image_atomic_min", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_image_atomic_max", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_image_atomic_and", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_image_atomic_or", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_image_atomic_xor", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_image_atomic_exchange", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee))
+ opcode = TGSI_OPCODE_ATOMCAS;
+ else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
+ if (opcode == TGSI_OPCODE_STORE)
+ inst->dst[0].writemask = WRITEMASK_XYZW;
+ }
+
+ inst->buffer = image;
+ inst->sampler_array_size = sampler_array_size;
+ inst->sampler_base = sampler_base;
+
+ switch (type->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ inst->tex_target = TEXTURE_3D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_CUBE:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_RECT:
+ inst->tex_target = TEXTURE_RECT_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_BUF:
+ inst->tex_target = TEXTURE_BUFFER_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ inst->tex_target = TEXTURE_EXTERNAL_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_MS:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
+ break;
+ default:
+ assert(!"Should not get here.");
+ }
+
+ inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
+ _mesa_get_shader_image_format(imgvar->data.image_format));
+}
+
+void
glsl_to_tgsi_visitor::visit(ir_call *ir)
{
glsl_to_tgsi_instruction *call_inst;
@@ -3381,6 +3625,36 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
return;
}
+ if (!strcmp("__intrinsic_load_shared", callee) ||
+ !strcmp("__intrinsic_store_shared", callee) ||
+ !strcmp("__intrinsic_atomic_add_shared", callee) ||
+ !strcmp("__intrinsic_atomic_min_shared", callee) ||
+ !strcmp("__intrinsic_atomic_max_shared", callee) ||
+ !strcmp("__intrinsic_atomic_and_shared", callee) ||
+ !strcmp("__intrinsic_atomic_or_shared", callee) ||
+ !strcmp("__intrinsic_atomic_xor_shared", callee) ||
+ !strcmp("__intrinsic_atomic_exchange_shared", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
+ visit_shared_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_image_load", callee) ||
+ !strcmp("__intrinsic_image_store", callee) ||
+ !strcmp("__intrinsic_image_atomic_add", callee) ||
+ !strcmp("__intrinsic_image_atomic_min", callee) ||
+ !strcmp("__intrinsic_image_atomic_max", callee) ||
+ !strcmp("__intrinsic_image_atomic_and", callee) ||
+ !strcmp("__intrinsic_image_atomic_or", callee) ||
+ !strcmp("__intrinsic_image_atomic_xor", callee) ||
+ !strcmp("__intrinsic_image_atomic_exchange", callee) ||
+ !strcmp("__intrinsic_image_atomic_comp_swap", callee) ||
+ !strcmp("__intrinsic_image_size", callee) ||
+ !strcmp("__intrinsic_image_samples", callee)) {
+ visit_image_intrinsic(ir);
+ return;
+ }
+
entry = get_function_signature(sig);
/* Process in parameters. */
foreach_two_lists(formal_node, &sig->parameters,
@@ -3980,6 +4254,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
num_address_regs = 0;
samplers_used = 0;
buffers_used = 0;
+ images_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
glsl_version = 0;
@@ -3992,6 +4267,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
options = NULL;
have_sqrt = false;
have_fma = false;
+ use_shared_memory = false;
}
glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
@@ -4015,6 +4291,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
{
v->samplers_used = 0;
v->buffers_used = 0;
+ v->images_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->info->is_tex) {
@@ -4035,8 +4312,20 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
if (inst->buffer.file != PROGRAM_UNDEFINED && (
is_resource_instruction(inst->op) ||
inst->op == TGSI_OPCODE_STORE)) {
- if (inst->buffer.file == PROGRAM_BUFFER)
+ if (inst->buffer.file == PROGRAM_BUFFER) {
v->buffers_used |= 1 << inst->buffer.index;
+ } else if (inst->buffer.file == PROGRAM_MEMORY) {
+ v->use_shared_memory = true;
+ } else {
+ assert(inst->buffer.file == PROGRAM_IMAGE);
+ for (int i = 0; i < inst->sampler_array_size; i++) {
+ unsigned idx = inst->sampler_base + i;
+ v->images_used |= 1 << idx;
+ v->image_targets[idx] =
+ st_translate_texture_target(inst->tex_target, false);
+ v->image_formats[idx] = inst->image_format;
+ }
+ }
}
}
prog->SamplersUsed = v->samplers_used;
@@ -4819,7 +5108,9 @@ struct st_translate {
struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
+ struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+ struct ureg_src shared_memory;
struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned *array_sizes;
struct array_decl *input_arrays;
@@ -4880,6 +5171,12 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
TGSI_SEMANTIC_PRIMID,
TGSI_SEMANTIC_TESSOUTER,
TGSI_SEMANTIC_TESSINNER,
+
+ /* Compute shaders
+ */
+ TGSI_SEMANTIC_THREAD_ID,
+ TGSI_SEMANTIC_BLOCK_ID,
+ TGSI_SEMANTIC_GRID_SIZE,
};
/**
@@ -5308,7 +5605,12 @@ compile_tgsi_instruction(struct st_translate *t,
for (i = num_src - 1; i >= 0; i--)
src[i + 1] = src[i];
num_src++;
- src[0] = t->buffers[inst->buffer.index];
+ if (inst->buffer.file == PROGRAM_MEMORY)
+ src[0] = t->shared_memory;
+ else if (inst->buffer.file == PROGRAM_BUFFER)
+ src[0] = t->buffers[inst->buffer.index];
+ else
+ src[0] = t->images[inst->buffer.index];
if (inst->buffer.reladdr)
src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
assert(src[0].File != TGSI_FILE_NULL);
@@ -5317,7 +5619,13 @@ compile_tgsi_instruction(struct st_translate *t,
break;
case TGSI_OPCODE_STORE:
- dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask);
+ if (inst->buffer.file == PROGRAM_MEMORY)
+ dst[0] = ureg_dst(t->shared_memory);
+ else if (inst->buffer.file == PROGRAM_BUFFER)
+ dst[0] = ureg_dst(t->buffers[inst->buffer.index]);
+ else
+ dst[0] = ureg_dst(t->images[inst->buffer.index]);
+ dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
if (inst->buffer.reladdr)
dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
assert(dst[0].File != TGSI_FILE_NULL);
@@ -5643,6 +5951,12 @@ st_translate_program(
TGSI_SEMANTIC_TESSCOORD);
assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] ==
TGSI_SEMANTIC_HELPER_INVOCATION);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_LOCAL_INVOCATION_ID] ==
+ TGSI_SEMANTIC_THREAD_ID);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_WORK_GROUP_ID] ==
+ TGSI_SEMANTIC_BLOCK_ID);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_NUM_WORK_GROUPS] ==
+ TGSI_SEMANTIC_GRID_SIZE);
t = CALLOC_STRUCT(st_translate);
if (!t) {
@@ -5710,6 +6024,8 @@ st_translate_program(
t->inputs[i] = ureg_DECL_vs_input(ureg, i);
}
break;
+ case TGSI_PROCESSOR_COMPUTE:
+ break;
default:
assert(0);
}
@@ -5719,6 +6035,7 @@ st_translate_program(
*/
switch (procType) {
case TGSI_PROCESSOR_FRAGMENT:
+ case TGSI_PROCESSOR_COMPUTE:
break;
case TGSI_PROCESSOR_GEOMETRY:
case TGSI_PROCESSOR_TESS_EVAL:
@@ -5969,7 +6286,17 @@ st_translate_program(
}
}
+ if (program->use_shared_memory)
+ t->shared_memory = ureg_DECL_shared_memory(ureg);
+ for (i = 0; i < program->shader->NumImages; i++) {
+ if (program->images_used & (1 << i)) {
+ t->images[i] = ureg_DECL_image(ureg, i,
+ program->image_targets[i],
+ program->image_formats[i],
+ true, false);
+ }
+ }
/* Emit each instruction in turn:
*/
@@ -6188,6 +6515,7 @@ get_mesa_program(struct gl_context *ctx,
struct st_geometry_program *stgp;
struct st_tessctrl_program *sttcp;
struct st_tesseval_program *sttep;
+ struct st_compute_program *stcp;
switch (shader->Type) {
case GL_VERTEX_SHADER:
@@ -6210,6 +6538,10 @@ get_mesa_program(struct gl_context *ctx,
sttep = (struct st_tesseval_program *)prog;
sttep->glsl_to_tgsi = v;
break;
+ case GL_COMPUTE_SHADER:
+ stcp = (struct st_compute_program *)prog;
+ stcp->glsl_to_tgsi = v;
+ break;
default:
assert(!"should not be reached");
return NULL;
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 84b65369d80..a983d64b5d5 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -906,7 +906,6 @@ st_manager_add_color_renderbuffer(struct st_context *st,
break;
default:
return FALSE;
- break;
}
if (!st_framebuffer_add_renderbuffer(stfb, idx))
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 5bd626f8610..2e21d02b8b5 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -158,6 +158,9 @@ delete_basic_variant(struct st_context *st, struct st_basic_variant *v,
case GL_GEOMETRY_PROGRAM_NV:
cso_delete_geometry_shader(st->cso_context, v->driver_shader);
break;
+ case GL_COMPUTE_PROGRAM_NV:
+ cso_delete_compute_shader(st->cso_context, v->driver_shader);
+ break;
default:
assert(!"this shouldn't occur");
}
@@ -193,6 +196,30 @@ st_release_basic_variants(struct st_context *st, GLenum target,
/**
+ * Free all variants of a compute program.
+ */
+void
+st_release_cp_variants(struct st_context *st, struct st_compute_program *stcp)
+{
+ struct st_basic_variant **variants = &stcp->variants;
+ struct st_basic_variant *v;
+
+ for (v = *variants; v; ) {
+ struct st_basic_variant *next = v->next;
+ delete_basic_variant(st, v, stcp->Base.Base.Target);
+ v = next;
+ }
+
+ *variants = NULL;
+
+ if (stcp->tgsi.prog) {
+ ureg_free_tokens(stcp->tgsi.prog);
+ stcp->tgsi.prog = NULL;
+ }
+}
+
+
+/**
* Translate a vertex program.
*/
bool
@@ -1395,6 +1422,74 @@ st_translate_tesseval_program(struct st_context *st,
/**
+ * Translate a compute program to create a new variant.
+ */
+bool
+st_translate_compute_program(struct st_context *st,
+ struct st_compute_program *stcp)
+{
+ struct ureg_program *ureg;
+ struct pipe_shader_state prog;
+
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_COMPUTE, st->pipe->screen);
+ if (ureg == NULL)
+ return false;
+
+ st_translate_program_common(st, &stcp->Base.Base, stcp->glsl_to_tgsi, ureg,
+ TGSI_PROCESSOR_COMPUTE, &prog);
+
+ stcp->tgsi.prog = prog.tokens;
+ stcp->tgsi.req_local_mem = stcp->Base.SharedSize;
+ stcp->tgsi.req_private_mem = 0;
+ stcp->tgsi.req_input_mem = 0;
+
+ free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
+ stcp->glsl_to_tgsi = NULL;
+ return true;
+}
+
+
+/**
+ * Get/create compute program variant.
+ */
+struct st_basic_variant *
+st_get_cp_variant(struct st_context *st,
+ struct pipe_compute_state *tgsi,
+ struct st_basic_variant **variants)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct st_basic_variant *v;
+ struct st_basic_variant_key key;
+
+ memset(&key, 0, sizeof(key));
+ key.st = st->has_shareable_shaders ? NULL : st;
+
+ /* Search for existing variant */
+ for (v = *variants; v; v = v->next) {
+ if (memcmp(&v->key, &key, sizeof(key)) == 0) {
+ break;
+ }
+ }
+
+ if (!v) {
+ /* create new */
+ v = CALLOC_STRUCT(st_basic_variant);
+ if (v) {
+ /* fill in new variant */
+ v->driver_shader = pipe->create_compute_state(pipe, tgsi);
+ v->key = key;
+
+ /* insert into list */
+ v->next = *variants;
+ *variants = v;
+ }
+ }
+
+ return v;
+}
+
+
+/**
* Vert/Geom/Frag programs have per-context variants. Free all the
* variants attached to the given program which match the given context.
*/
@@ -1449,14 +1544,17 @@ destroy_program_variants(struct st_context *st, struct gl_program *target)
case GL_GEOMETRY_PROGRAM_NV:
case GL_TESS_CONTROL_PROGRAM_NV:
case GL_TESS_EVALUATION_PROGRAM_NV:
+ case GL_COMPUTE_PROGRAM_NV:
{
struct st_geometry_program *gp = (struct st_geometry_program*)target;
struct st_tessctrl_program *tcp = (struct st_tessctrl_program*)target;
struct st_tesseval_program *tep = (struct st_tesseval_program*)target;
+ struct st_compute_program *cp = (struct st_compute_program*)target;
struct st_basic_variant **variants =
target->Target == GL_GEOMETRY_PROGRAM_NV ? &gp->variants :
target->Target == GL_TESS_CONTROL_PROGRAM_NV ? &tcp->variants :
target->Target == GL_TESS_EVALUATION_PROGRAM_NV ? &tep->variants :
+ target->Target == GL_COMPUTE_PROGRAM_NV ? &cp->variants :
NULL;
struct st_basic_variant *v, **prevPtr = variants;
@@ -1513,6 +1611,7 @@ destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
case GL_GEOMETRY_SHADER:
case GL_TESS_CONTROL_SHADER:
case GL_TESS_EVALUATION_SHADER:
+ case GL_COMPUTE_SHADER:
{
destroy_program_variants(st, shader->Program);
}
@@ -1629,6 +1728,12 @@ st_precompile_shader_variant(struct st_context *st,
break;
}
+ case GL_COMPUTE_PROGRAM_NV: {
+ struct st_compute_program *p = (struct st_compute_program *)prog;
+ st_get_cp_variant(st, &p->tgsi, &p->variants);
+ break;
+ }
+
default:
assert(0);
}
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 74f3def6095..028fba99a74 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -231,6 +231,18 @@ struct st_tesseval_program
};
+/**
+ * Derived from Mesa gl_compute_program:
+ */
+struct st_compute_program
+{
+ struct gl_compute_program Base; /**< The Mesa compute program */
+ struct pipe_compute_state tgsi;
+ struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+ struct st_basic_variant *variants;
+};
+
static inline struct st_fragment_program *
st_fragment_program( struct gl_fragment_program *fp )
@@ -263,6 +275,12 @@ st_tesseval_program( struct gl_tess_eval_program *tep )
return (struct st_tesseval_program *)tep;
}
+static inline struct st_compute_program *
+st_compute_program( struct gl_compute_program *cp )
+{
+ return (struct st_compute_program *)cp;
+}
+
static inline void
st_reference_vertprog(struct st_context *st,
struct st_vertex_program **ptr,
@@ -313,6 +331,16 @@ st_reference_tesseprog(struct st_context *st,
(struct gl_program *) prog);
}
+static inline void
+st_reference_compprog(struct st_context *st,
+ struct st_compute_program **ptr,
+ struct st_compute_program *prog)
+{
+ _mesa_reference_program(st->ctx,
+ (struct gl_program **) ptr,
+ (struct gl_program *) prog);
+}
+
/**
* This defines mapping from Mesa VARYING_SLOTs to TGSI GENERIC slots.
*/
@@ -351,6 +379,11 @@ st_get_fp_variant(struct st_context *st,
const struct st_fp_variant_key *key);
extern struct st_basic_variant *
+st_get_cp_variant(struct st_context *st,
+ struct pipe_compute_state *tgsi,
+ struct st_basic_variant **variants);
+
+extern struct st_basic_variant *
st_get_basic_variant(struct st_context *st,
unsigned pipe_shader,
struct pipe_shader_state *tgsi,
@@ -365,6 +398,10 @@ st_release_fp_variants( struct st_context *st,
struct st_fragment_program *stfp );
extern void
+st_release_cp_variants(struct st_context *st,
+ struct st_compute_program *stcp);
+
+extern void
st_release_basic_variants(struct st_context *st, GLenum target,
struct st_basic_variant **variants,
struct pipe_shader_state *tgsi);
@@ -392,6 +429,10 @@ extern bool
st_translate_tesseval_program(struct st_context *st,
struct st_tesseval_program *sttep);
+extern bool
+st_translate_compute_program(struct st_context *st,
+ struct st_compute_program *stcp);
+
extern void
st_print_current_vertex_program(void);