diff options
author | Jason Ekstrand <[email protected]> | 2015-06-23 18:05:25 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-06-23 18:05:25 -0700 |
commit | a62edcce4eb4c800d972817a20ee874bf2a2c3ef (patch) | |
tree | 46083a8762d45a6c595c7aaee2bb1cd0fc36eb62 /src/mesa | |
parent | 9b9f973ca6d3cc1ec5be27857def00a83c032464 (diff) | |
parent | 6844d6b7f8398a25eff511541b187afeb1199ce0 (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/mesa')
239 files changed, 8516 insertions, 7976 deletions
diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk index cc979547e0a..145f2594cda 100644 --- a/src/mesa/Android.gen.mk +++ b/src/mesa/Android.gen.mk @@ -115,9 +115,11 @@ $(intermediates)/main/api_exec.c: $(dispatch_deps) GET_HASH_GEN := $(LOCAL_PATH)/main/get_hash_generator.py +$(intermediates)/main/get_hash.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(GET_HASH_GEN) +$(intermediates)/main/get_hash.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml $(intermediates)/main/get_hash.h: $(glapi)/gl_and_es_API.xml \ $(LOCAL_PATH)/main/get_hash_params.py $(GET_HASH_GEN) - @$(MESA_PYTHON2) $(GET_HASH_GEN) -f $< > $@ + $(call es-gen) FORMAT_INFO := $(LOCAL_PATH)/main/format_info.py format_info_deps := \ @@ -125,8 +127,10 @@ format_info_deps := \ $(LOCAL_PATH)/main/format_parser.py \ $(FORMAT_INFO) +$(intermediates)/main/format_info.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_INFO) +$(intermediates)/main/format_info.h: PRIVATE_XML := $(intermediates)/main/format_info.h: $(format_info_deps) - @$(MESA_PYTHON2) $(FORMAT_INFO) $< > $@ + $(call es-gen, $<) FORMAT_PACK := $(LOCAL_PATH)/main/format_pack.py format_pack_deps := \ @@ -134,8 +138,10 @@ format_pack_deps := \ $(LOCAL_PATH)/main/format_parser.py \ $(FORMAT_PACK) +$(intermediates)/main/format_pack.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_PACK) +$(intermediates)/main/format_pack.c: PRIVATE_XML := $(intermediates)/main/format_pack.c: $(format_pack_deps) - $(hide) $(MESA_PYTHON2) $(FORMAT_PACK) $< > $@ + $(call es-gen, $<) FORMAT_UNPACK := $(LOCAL_PATH)/main/format_unpack.py format_unpack_deps := \ @@ -143,5 +149,7 @@ format_unpack_deps := \ $(LOCAL_PATH)/main/format_parser.py \ $(FORMAT_UNPACK) +$(intermediates)/main/format_unpack.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_UNPACK) +$(intermediates)/main/format_unpack.c: PRIVATE_XML := $(intermediates)/main/format_unpack.c: $(format_unpack_deps) - $(hide) $(MESA_PYTHON2) $(FORMAT_UNPACK) $< > $@ + $(call es-gen, $<) diff --git a/src/mesa/Android.libmesa_glsl_utils.mk b/src/mesa/Android.libmesa_glsl_utils.mk index 3497377af8c..ed620ac648c 100644 --- a/src/mesa/Android.libmesa_glsl_utils.mk +++ b/src/mesa/Android.libmesa_glsl_utils.mk @@ -44,7 +44,8 @@ LOCAL_C_INCLUDES := \ LOCAL_SRC_FILES := \ main/imports.c \ program/prog_hash_table.c \ - program/symbol_table.c + program/symbol_table.c \ + program/dummy_errors.c include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -68,7 +69,8 @@ LOCAL_C_INCLUDES := \ LOCAL_SRC_FILES := \ main/imports.c \ program/prog_hash_table.c \ - program/symbol_table.c + program/symbol_table.c \ + program/dummy_errors.c include $(MESA_COMMON_MK) include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 60114e4f66a..71794b5dada 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -60,7 +60,6 @@ main/git_sha1.h: main/git_sha1.h.tmp include Makefile.sources EXTRA_DIST = \ - drivers/haiku \ drivers/SConscript \ main/format_info.py \ main/format_pack.py \ diff --git a/src/mesa/drivers/SConscript b/src/mesa/drivers/SConscript index db656780c0b..5d654f538be 100644 --- a/src/mesa/drivers/SConscript +++ b/src/mesa/drivers/SConscript @@ -8,6 +8,3 @@ if env['dri']: 'dri/common/xmlpool/SConscript', 'dri/common/SConscript', ]) - -if env['platform'] == 'haiku': - SConscript('haiku/swrast/SConscript') diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 0d094ddf4e6..71c1a763912 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -172,7 +172,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->UnmapRenderbuffer = _swrast_unmap_soft_renderbuffer; driver->RenderTexture = _swrast_render_texture; driver->FinishRenderTexture = _swrast_finish_render_texture; - driver->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer; + driver->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw; driver->ValidateFramebuffer = _mesa_validate_framebuffer; driver->BlitFramebuffer = _swrast_BlitFramebuffer; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index d2ab7b8ded9..214a68a9129 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -1211,7 +1211,8 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_BindRenderbuffer(GL_RENDERBUFFER, save->RenderbufferName); if (state & MESA_META_DRAW_BUFFERS) { - _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, save->ColorDrawBuffers, NULL); + _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers, + save->ColorDrawBuffers, NULL); } ctx->Meta->SaveStackDepth--; diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index bb2164276b2..9cace2b245a 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -82,7 +82,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, y_scale = samples * 0.5; /* We expect only power of 2 samples in source multisample buffer. */ - assert(samples > 0 && (samples & (samples - 1)) == 0); + assert(samples > 0 && is_power_of_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } @@ -263,7 +263,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, } /* We expect only power of 2 samples in source multisample buffer. */ - assert(samples > 0 && (samples & (samples - 1)) == 0); + assert(samples > 0 && is_power_of_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } @@ -434,7 +434,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, * (so the floating point exponent just gets increased), rather than * doing a naive sum and dividing. */ - assert((samples & (samples - 1)) == 0); + assert(is_power_of_two(samples)); /* Fetch each individual sample. */ sample_resolve = rzalloc_size(mem_ctx, 1); for (i = 0; i < samples; i++) { diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c index ad6e7873ecd..d2474f52718 100644 --- a/src/mesa/drivers/common/meta_tex_subimage.c +++ b/src/mesa/drivers/common/meta_tex_subimage.c @@ -34,6 +34,7 @@ #include "macros.h" #include "meta.h" #include "pbo.h" +#include "readpix.h" #include "shaderapi.h" #include "state.h" #include "teximage.h" @@ -150,7 +151,8 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, bool success = false; int z; - if (!_mesa_is_bufferobj(packing->BufferObj) && !create_pbo) + if (!_mesa_is_bufferobj(packing->BufferObj) && + (!create_pbo || pixels == NULL)) return false; if (format == GL_DEPTH_COMPONENT || @@ -257,6 +259,7 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 }; int full_height, image_height; struct gl_texture_image *pbo_tex_image; + struct gl_renderbuffer *rb = NULL; GLenum status; bool success = false; int z; @@ -273,6 +276,13 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, if (ctx->_ImageTransferState) return false; + + if (!tex_image) { + rb = ctx->ReadBuffer->_ColorReadBuffer; + if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) + return false; + } + /* For arrays, use a tall (height * depth) 2D texture but taking into * account the inter-image padding specified with the image height packing * property. diff --git a/src/mesa/drivers/dri/Makefile.am b/src/mesa/drivers/dri/Makefile.am index fa1de103b56..08a8e645521 100644 --- a/src/mesa/drivers/dri/Makefile.am +++ b/src/mesa/drivers/dri/Makefile.am @@ -60,6 +60,7 @@ mesa_dri_drivers_la_LIBADD = \ ../../libmesa.la \ common/libmegadriver_stub.la \ common/libdricommon.la \ + common/libxmlconfig.la \ $(MEGADRIVERS_DEPS) \ $(DRI_LIB_DEPS) \ $() diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk index a7fcd6d572a..6986f5e8cb4 100644 --- a/src/mesa/drivers/dri/common/Android.mk +++ b/src/mesa/drivers/dri/common/Android.mk @@ -39,7 +39,9 @@ intermediates := $(call local-generated-sources-dir) LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) -LOCAL_EXPORT_C_INCLUDE_DIRS := $(intermediates) +LOCAL_EXPORT_C_INCLUDE_DIRS := \ + $(LOCAL_PATH) \ + $(intermediates) # swrast only ifeq ($(MESA_GPU_DRIVERS),swrast) @@ -48,7 +50,9 @@ else LOCAL_SHARED_LIBRARIES := libdrm endif -LOCAL_SRC_FILES := $(DRI_COMMON_FILES) +LOCAL_SRC_FILES := \ + $(DRI_COMMON_FILES) \ + $(XMLCONFIG_FILES) MESA_DRI_OPTIONS_H := $(intermediates)/xmlpool/options.h LOCAL_GENERATED_SOURCES := $(MESA_DRI_OPTIONS_H) diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am index da8f97a980e..ae19fcb3565 100644 --- a/src/mesa/drivers/dri/common/Makefile.am +++ b/src/mesa/drivers/dri/common/Makefile.am @@ -33,16 +33,20 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ $(DEFINES) \ - $(EXPAT_CFLAGS) \ $(VISIBILITY_CFLAGS) noinst_LTLIBRARIES = \ libdricommon.la \ + libxmlconfig.la \ libmegadriver_stub.la \ libdri_test_stubs.la libdricommon_la_SOURCES = $(DRI_COMMON_FILES) +libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES) +libxmlconfig_la_CFLAGS = $(AM_CFLAGS) $(EXPAT_CFLAGS) +libxmlconfig_la_LIBADD = $(EXPAT_LIBS) -lm + libdri_test_stubs_la_SOURCES = $(test_stubs_FILES) libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN diff --git a/src/mesa/drivers/dri/common/Makefile.sources b/src/mesa/drivers/dri/common/Makefile.sources index d00ec5f7334..d5d8da8fcee 100644 --- a/src/mesa/drivers/dri/common/Makefile.sources +++ b/src/mesa/drivers/dri/common/Makefile.sources @@ -2,7 +2,9 @@ DRI_COMMON_FILES := \ utils.c \ utils.h \ dri_util.c \ - dri_util.h \ + dri_util.h + +XMLCONFIG_FILES := \ xmlconfig.c \ xmlconfig.h diff --git a/src/mesa/drivers/dri/common/SConscript b/src/mesa/drivers/dri/common/SConscript index 0bee1b41fc6..b402736db69 100644 --- a/src/mesa/drivers/dri/common/SConscript +++ b/src/mesa/drivers/dri/common/SConscript @@ -37,7 +37,7 @@ drienv.PkgUseModules('DRM') # else #env.Append(CPPDEFINES = ['__NOT_HAVE_DRM_H']) -sources = drienv.ParseSourceList('Makefile.sources', 'DRI_COMMON_FILES') +sources = drienv.ParseSourceList('Makefile.sources', ['DRI_COMMON_FILES', 'XMLCONFIG_FILES' ]) dri_common = drienv.ConvenienceLibrary( target = 'dri_common', diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index d6e875fcfeb..e7ababe0b67 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -162,13 +162,21 @@ driCreateNewScreen2(int scrn, int fd, return NULL; } - int gl_version_override = _mesa_get_gl_version_override(); - if (gl_version_override >= 31) { - psp->max_gl_core_version = MAX2(psp->max_gl_core_version, - gl_version_override); - } else { - psp->max_gl_compat_version = MAX2(psp->max_gl_compat_version, - gl_version_override); + struct gl_constants consts = { 0 }; + gl_api api; + unsigned version; + + api = API_OPENGLES2; + if (_mesa_override_gl_version_contextless(&consts, &api, &version)) + psp->max_gl_es2_version = version; + + api = API_OPENGL_COMPAT; + if (_mesa_override_gl_version_contextless(&consts, &api, &version)) { + if (api == API_OPENGL_CORE) { + psp->max_gl_core_version = version; + } else { + psp->max_gl_compat_version = version; + } } psp->api_mask = (1 << __DRI_API_OPENGL); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 91da977acee..8ed8ff555ba 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -730,9 +730,9 @@ i830_update_draw_buffer(struct intel_context *intel) */ if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); } if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 9b002236add..03c32e56d82 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -220,7 +220,7 @@ get_result_flags(const struct prog_instruction *inst) { GLuint flags = 0; - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) flags |= A0_DEST_SATURATE; if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 97bf81ed759..80bd249fa7b 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -732,9 +732,9 @@ i915_update_draw_buffer(struct intel_context *intel) */ if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); } if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c index 24c318049c4..a5d5c5832fb 100644 --- a/src/mesa/drivers/dri/i915/intel_fbo.c +++ b/src/mesa/drivers/dri/i915/intel_fbo.c @@ -427,7 +427,7 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx, { DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0); - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb); intel_draw_buffer(ctx); } diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index cf2424e34b4..9c947be88a0 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -48,6 +48,7 @@ libi965_dri_la_LIBADD = $(INTEL_LIBS) TEST_LIBS = \ libi965_dri.la \ ../common/libdricommon.la \ + ../common/libxmlconfig.la \ ../common/libmegadriver_stub.la \ ../../../libmesa.la \ $(DRI_LIB_DEPS) \ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 1ae93e1d5f3..981fe79b132 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -18,9 +18,11 @@ i965_FILES = \ brw_clip_unfilled.c \ brw_clip_util.c \ brw_compute.c \ + brw_conditional_render.c \ brw_context.c \ brw_context.h \ brw_cs.cpp \ + brw_cs.h \ brw_cubemap_normalize.cpp \ brw_curbe.c \ brw_dead_control_flow.cpp \ @@ -40,6 +42,7 @@ i965_FILES = \ brw_ff_gs.c \ brw_ff_gs_emit.c \ brw_ff_gs.h \ + brw_fs_builder.h \ brw_fs_channel_expressions.cpp \ brw_fs_cmod_propagation.cpp \ brw_fs_combine_constants.cpp \ @@ -47,7 +50,6 @@ i965_FILES = \ brw_fs.cpp \ brw_fs_cse.cpp \ brw_fs_dead_code_eliminate.cpp \ - brw_fs_fp.cpp \ brw_fs_generator.cpp \ brw_fs.h \ brw_fs_live_variables.cpp \ @@ -128,6 +130,7 @@ i965_FILES = \ brw_vs.h \ brw_vs_state.c \ brw_vs_surface_state.c \ + brw_vue_map.c \ brw_wm.c \ brw_wm.h \ brw_wm_iz.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index c1b760920d9..789520c7353 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -29,7 +29,8 @@ brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw, bool debug_flag) : mem_ctx(ralloc_context(NULL)), - generator(brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), + generator(brw->intelScreen->compiler, brw, + mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data), NULL, 0, false, "BLORP") { diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 7e7770e43cd..f1f230e3751 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -141,12 +141,12 @@ bblock_t::combine_with(bblock_t *that) } void -bblock_t::dump(backend_visitor *v) const +bblock_t::dump(backend_shader *s) const { int ip = this->start_ip; foreach_inst_in_block(backend_instruction, inst, this) { fprintf(stderr, "%5d: ", ip); - v->dump_instruction(inst); + s->dump_instruction(inst); ip++; } } @@ -231,6 +231,7 @@ cfg_t::cfg_t(exec_list *instructions) if (cur_else) { cur_else->add_successor(mem_ctx, cur_endif); } else { + assert(cur_if != NULL); cur_if->add_successor(mem_ctx, cur_endif); } @@ -299,6 +300,7 @@ cfg_t::cfg_t(exec_list *instructions) inst->exec_node::remove(); cur->instructions.push_tail(inst); + assert(cur_do != NULL && cur_while != NULL); cur->add_successor(mem_ctx, cur_do); set_next_block(&cur, cur_while, ip); @@ -411,7 +413,7 @@ cfg_t::make_block_array() } void -cfg_t::dump(backend_visitor *v) +cfg_t::dump(backend_shader *s) { if (idom_dirty) calculate_idom(); @@ -423,8 +425,8 @@ cfg_t::dump(backend_visitor *v) link->block->num); } fprintf(stderr, "\n"); - if (v != NULL) - block->dump(v); + if (s != NULL) + block->dump(s); fprintf(stderr, "END B%d", block->num); foreach_list_typed(bblock_link, link, link, &block->children) { fprintf(stderr, " ->B%d", diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index 56d7d07abdf..a09491781e6 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -60,7 +60,7 @@ struct bblock_t { bool is_successor_of(const bblock_t *block) const; bool can_combine_with(const bblock_t *that) const; void combine_with(bblock_t *that); - void dump(backend_visitor *v) const; + void dump(backend_shader *s) const; backend_instruction *start(); const backend_instruction *start() const; @@ -273,7 +273,7 @@ struct cfg_t { void calculate_idom(); static bblock_t *intersect(bblock_t *b1, bblock_t *b2); - void dump(backend_visitor *v); + void dump(backend_shader *s); void dump_cfg(); void dump_domtree(); #endif diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 12314204803..1d4ba3cac7e 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -121,8 +121,9 @@ brw_fast_clear_depth(struct gl_context *ctx) * first. */ if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(ctx, fb)) { - perf_debug("Failed to fast clear depth due to scissor being enabled. " - "Possible 5%% performance win if avoided.\n"); + perf_debug("Failed to fast clear %dx%d depth because of scissors. " + "Possible 5%% performance win if avoided.\n", + mt->logical_width0, mt->logical_height0); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 32238341aae..dee74dba8af 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -32,6 +32,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "main/framebuffer.h" static void upload_clip_vp(struct brw_context *brw) @@ -59,7 +60,9 @@ brw_upload_clip_unit(struct brw_context *brw) struct brw_clip_unit_state *clip; /* _NEW_BUFFERS */ - struct gl_framebuffer *fb = ctx->DrawBuffer; + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const float fb_width = (float)_mesa_geometric_width(fb); + const float fb_height = (float)_mesa_geometric_height(fb); upload_clip_vp(brw); @@ -127,8 +130,8 @@ brw_upload_clip_unit(struct brw_context *brw) /* enable guardband clipping if we can */ if (ctx->ViewportArray[0].X == 0 && ctx->ViewportArray[0].Y == 0 && - ctx->ViewportArray[0].Width == (float) fb->Width && - ctx->ViewportArray[0].Height == (float) fb->Height) + ctx->ViewportArray[0].Width == fb_width && + ctx->ViewportArray[0].Height == fb_height) { clip->clip5.guard_band_enable = 1; clip->clip6.clipper_viewport_state_ptr = diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index b3d6de51adc..5693ab507d4 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -45,7 +45,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups) unsigned thread_width_max = (group_size + simd_size - 1) / simd_size; - uint32_t right_mask = (1u << simd_size) - 1; + uint32_t right_mask = 0xffffffffu >> (32 - simd_size); const unsigned right_non_aligned = group_size & (simd_size - 1); if (right_non_aligned != 0) right_mask >>= (simd_size - right_non_aligned); diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c new file mode 100644 index 00000000000..6d37c3b6928 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c @@ -0,0 +1,161 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Neil Roberts <[email protected]> + */ + +/** @file brw_conditional_render.c + * + * Support for conditional rendering based on query objects + * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+. + */ + +#include "main/imports.h" +#include "main/condrender.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +set_predicate_enable(struct brw_context *brw, + bool value) +{ + if (value) + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + else + brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER; +} + +static void +set_predicate_for_result(struct brw_context *brw, + struct brw_query_object *query, + bool inverted) +{ + int load_op; + + assert(query->bo != NULL); + + brw_load_register_mem64(brw, + MI_PREDICATE_SRC0, + query->bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, /* write domain */ + 0 /* offset */); + brw_load_register_mem64(brw, + MI_PREDICATE_SRC1, + query->bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, /* write domain */ + 8 /* offset */); + + if (inverted) + load_op = MI_PREDICATE_LOADOP_LOAD; + else + load_op = MI_PREDICATE_LOADOP_LOADINV; + + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + load_op | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; +} + +static void +brw_begin_conditional_render(struct gl_context *ctx, + struct gl_query_object *q, + GLenum mode) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_query_object *query = (struct brw_query_object *) q; + bool inverted; + + if (!brw->predicate.supported) + return; + + switch (mode) { + case GL_QUERY_WAIT: + case GL_QUERY_NO_WAIT: + case GL_QUERY_BY_REGION_WAIT: + case GL_QUERY_BY_REGION_NO_WAIT: + inverted = false; + break; + case GL_QUERY_WAIT_INVERTED: + case GL_QUERY_NO_WAIT_INVERTED: + case GL_QUERY_BY_REGION_WAIT_INVERTED: + case GL_QUERY_BY_REGION_NO_WAIT_INVERTED: + inverted = true; + break; + default: + unreachable("Unexpected conditional render mode"); + } + + /* If there are already samples from a BLT operation or if the query object + * is ready then we can avoid looking at the values in the buffer and just + * decide whether to draw using the CPU without stalling. + */ + if (query->Base.Result || query->Base.Ready) + set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted); + else + set_predicate_for_result(brw, query, inverted); +} + +static void +brw_end_conditional_render(struct gl_context *ctx, + struct gl_query_object *q) +{ + struct brw_context *brw = brw_context(ctx); + + /* When there is no longer a conditional render in progress it should + * always render. + */ + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; +} + +void +brw_init_conditional_render_functions(struct dd_function_table *functions) +{ + functions->BeginConditionalRender = brw_begin_conditional_render; + functions->EndConditionalRender = brw_end_conditional_render; +} + +bool +brw_check_conditional_render(struct brw_context *brw) +{ + if (brw->predicate.supported) { + /* In some cases it is possible to determine that the primitives should + * be skipped without needing the predicate enable bit and still without + * stalling. + */ + return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER; + } else if (brw->ctx.Query.CondRenderQuery) { + perf_debug("Conditional rendering is implemented in software and may " + "stall.\n"); + return _mesa_check_conditional_render(&brw->ctx); + } else { + return true; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 23838056690..ebf12fab69e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -50,6 +50,7 @@ #include "brw_context.h" #include "brw_defines.h" +#include "brw_shader.h" #include "brw_draw.h" #include "brw_state.h" @@ -68,8 +69,6 @@ #include "tnl/t_pipeline.h" #include "util/ralloc.h" -#include "glsl/nir/nir.h" - /*************************************** * Mesa's Driver Functions ***************************************/ @@ -289,6 +288,8 @@ brw_init_driver_functions(struct brw_context *brw, else gen4_init_queryobj_functions(functions); brw_init_compute_functions(functions); + if (brw->gen >= 7) + brw_init_conditional_render_functions(functions); functions->QuerySamplesForFormat = brw_query_samples_for_format; @@ -427,11 +428,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; - if (brw->gen >= 9 || brw->is_cherryview) { - ctx->Const.MaxLineWidth = 40.0; - ctx->Const.MaxLineWidthAA = 40.0; - ctx->Const.LineWidthGranularity = 0.125; - } else if (brw->gen >= 6) { + if (brw->gen >= 6) { ctx->Const.MaxLineWidth = 7.375; ctx->Const.MaxLineWidthAA = 7.375; ctx->Const.LineWidthGranularity = 0.125; @@ -441,6 +438,13 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.LineWidthGranularity = 0.5; } + /* For non-antialiased lines, we have to round the line width to the + * nearest whole number. Make sure that we don't advertise a line + * width that, when rounded, will be beyond the actual hardware + * maximum. + */ + assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); + ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; ctx->Const.MaxPointSize = 255.0; @@ -544,6 +548,7 @@ brw_initialize_context_constants(struct brw_context *brw) */ ctx->Const.UniformBufferOffsetAlignment = 16; ctx->Const.TextureBufferOffsetAlignment = 16; + ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; if (brw->gen >= 6) { ctx->Const.MaxVarying = 32; @@ -553,51 +558,12 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; } - static const nir_shader_compiler_options nir_options = { - .native_integers = true, - /* In order to help allow for better CSE at the NIR level we tell NIR - * to split all ffma instructions during opt_algebraic and we then - * re-combine them as a later step. - */ - .lower_ffma = true, - .lower_sub = true, - }; - /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { - ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; - ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput = - (i == MESA_SHADER_FRAGMENT); - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp = - (i == MESA_SHADER_FRAGMENT); - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false; - ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true; + ctx->Const.ShaderCompilerOptions[i] = + brw->intelScreen->compiler->glsl_compiler_options[i]; } - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; - - if (brw->scalar_vs) { - /* If we're using the scalar backend for vertex shaders, we need to - * configure these accordingly. - */ - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; - - if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; - } - - if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) - ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options; - - ctx->Const.ShaderCompilerOptions[MESA_SHADER_COMPUTE].NirOptions = &nir_options; - /* ARB_viewport_array */ if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; @@ -612,6 +578,12 @@ brw_initialize_context_constants(struct brw_context *brw) /* ARB_gpu_shader5 */ if (brw->gen >= 7) ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); + + /* ARB_framebuffer_no_attachments */ + ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth; + ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight; + ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; + ctx->Const.MaxFramebufferSamples = max_samples; } static void @@ -814,10 +786,9 @@ brwCreateContext(gl_api api, _mesa_meta_init(ctx); brw_process_driconf_options(brw); - brw_process_intel_debug_variable(brw); - if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) - brw->scalar_vs = true; + if (INTEL_DEBUG & DEBUG_PERF) + brw->perf_debug = true; brw_initialize_context_constants(brw); @@ -894,6 +865,8 @@ brwCreateContext(gl_api api, brw->gs.enabled = false; brw->sf.viewport_transform_enable = true; + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cb4cc7fb36b..9e1f722df9e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -611,6 +611,12 @@ struct brw_ff_gs_prog_data { unsigned svbi_postincrement_value; }; +enum shader_dispatch_mode { + DISPATCH_MODE_4X1_SINGLE = 0, + DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, + DISPATCH_MODE_4X2_DUAL_OBJECT = 2, + DISPATCH_MODE_SIMD8 = 3, +}; /* Note: brw_vue_prog_data_compare() must be updated when adding fields to * this struct! @@ -628,7 +634,7 @@ struct brw_vue_prog_data { */ GLuint urb_entry_size; - bool simd8; + enum shader_dispatch_mode dispatch_mode; }; @@ -726,14 +732,6 @@ struct brw_gs_prog_data int invocations; /** - * Dispatch mode, can be any of: - * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT - * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE - * GEN7_GS_DISPATCH_MODE_SINGLE - */ - int dispatch_mode; - - /** * Gen6 transform feedback enabled flag. */ bool gen6_xfb_enabled; @@ -829,20 +827,10 @@ struct brw_tracked_state { enum shader_time_shader_type { ST_NONE, ST_VS, - ST_VS_WRITTEN, - ST_VS_RESET, ST_GS, - ST_GS_WRITTEN, - ST_GS_RESET, ST_FS8, - ST_FS8_WRITTEN, - ST_FS8_RESET, ST_FS16, - ST_FS16_WRITTEN, - ST_FS16_RESET, ST_CS, - ST_CS_WRITTEN, - ST_CS_RESET, }; struct brw_vertex_buffer { @@ -972,6 +960,22 @@ struct brw_stage_state uint32_t sampler_offset; }; +enum brw_predicate_state { + /* The first two states are used if we can determine whether to draw + * without having to look at the values in the query object buffer. This + * will happen if there is no conditional render in progress, if the query + * object is already completed or if something else has already added + * samples to the preliminary result such as via a BLT command. + */ + BRW_PREDICATE_STATE_RENDER, + BRW_PREDICATE_STATE_DONT_RENDER, + /* In this case whether to draw or not depends on the result of an + * MI_PREDICATE command so the predicate enable bit needs to be checked. + */ + BRW_PREDICATE_STATE_USE_BIT +}; + +struct shader_times; /** * brw_context is derived from gl_context. @@ -1131,7 +1135,6 @@ struct brw_context bool has_pln; bool no_simd8; bool use_rep_send; - bool scalar_vs; /** * Some versions of Gen hardware don't do centroid interpolation correctly @@ -1408,6 +1411,11 @@ struct brw_context } query; struct { + enum brw_predicate_state state; + bool supported; + } predicate; + + struct { /** A map from pipeline statistics counter IDs to MMIO addresses. */ const int *statistics_registers; @@ -1453,6 +1461,7 @@ struct brw_context uint32_t offset; uint32_t size; enum aub_state_struct_type type; + int index; } *state_batch_list; int state_batch_count; @@ -1492,7 +1501,7 @@ struct brw_context const char **names; int *ids; enum shader_time_shader_type *types; - uint64_t *cumulative; + struct shader_times *cumulative; int num_entries; int max_entries; double report_time; @@ -1606,12 +1615,21 @@ void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx); void brw_store_register_mem64(struct brw_context *brw, drm_intel_bo *bo, uint32_t reg, int idx); +/** brw_conditional_render.c */ +void brw_init_conditional_render_functions(struct dd_function_table *functions); +bool brw_check_conditional_render(struct brw_context *brw); + /** intel_batchbuffer.c */ void brw_load_register_mem(struct brw_context *brw, uint32_t reg, drm_intel_bo *bo, uint32_t read_domains, uint32_t write_domain, uint32_t offset); +void brw_load_register_mem64(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t read_domains, uint32_t write_domain, + uint32_t offset); /*====================================================================== * brw_state_dump.c @@ -1991,6 +2009,10 @@ void intel_context_destroy(struct brw_context *brw); void brw_initialize_context_constants(struct brw_context *brw); +bool +gen9_use_linear_1d_layout(const struct brw_context *brw, + const struct intel_mipmap_tree *mt); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 2432875d0f4..42a082b57b6 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -88,9 +88,15 @@ brw_cs_emit(struct brw_context *brw, cfg_t *cfg = NULL; const char *fail_msg = NULL; + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS); + /* Now the main event: Visit the shader IR and generate our CS IR for it. */ - fs_visitor v8(brw, mem_ctx, key, prog_data, prog, cp, 8); + fs_visitor v8(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, + &cp->Base, 8, st_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { @@ -98,7 +104,9 @@ brw_cs_emit(struct brw_context *brw, prog_data->simd_size = 8; } - fs_visitor v16(brw, mem_ctx, key, prog_data, prog, cp, 16); + fs_visitor v16(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, + &cp->Base, 16, st_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && local_workgroup_size <= 16 * brw->max_cs_threads) { @@ -126,7 +134,8 @@ brw_cs_emit(struct brw_context *brw, return NULL; } - fs_generator g(brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base, + fs_generator g(brw->intelScreen->compiler, brw, + mem_ctx, (void*) key, &prog_data->base, &cp->Base, v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); if (INTEL_DEBUG & DEBUG_CS) { char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d", @@ -368,9 +377,11 @@ brw_upload_cs_state(struct brw_context *brw) extern "C" const struct brw_tracked_state brw_cs_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CS_PROG_DATA, + /* explicit initialisers aren't valid C++, comment + * them for documentation purposes */ + /* .dirty = */{ + /* .mesa = */ 0, + /* .brw = */ BRW_NEW_CS_PROG_DATA, }, - .emit = brw_upload_cs_state + /* .emit = */ brw_upload_cs_state }; diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 03f838dd9ae..61f25811cb2 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -36,11 +36,11 @@ * - if/else/endif */ bool -dead_control_flow_eliminate(backend_visitor *v) +dead_control_flow_eliminate(backend_shader *s) { bool progress = false; - foreach_block_safe (block, v->cfg) { + foreach_block_safe (block, s->cfg) { bblock_t *if_block = NULL, *else_block = NULL, *endif_block = block; bool found = false; @@ -115,7 +115,7 @@ dead_control_flow_eliminate(backend_visitor *v) } if (progress) - v->invalidate_live_intervals(); + s->invalidate_live_intervals(); return progress; } diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h index 57a4dabc83c..83fd9b1e79e 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h @@ -23,4 +23,4 @@ #include "brw_shader.h" -bool dead_control_flow_eliminate(backend_visitor *v); +bool dead_control_flow_eliminate(backend_shader *s); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3c704ee9d08..c113d52a3d3 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -38,6 +38,7 @@ fieldval & field ## _MASK; \ }) +#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low)) #define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) #ifndef BRW_DEFINES_H @@ -51,6 +52,7 @@ # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) # define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10) +# define GEN7_3DPRIM_PREDICATE_ENABLE (1 << 8) /* DW1 */ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) @@ -530,9 +532,11 @@ #define GEN7_SURFACE_ARYSPC_FULL (0 << 10) #define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) -/* Surface state DW0 */ +/* Surface state DW1 */ #define GEN8_SURFACE_MOCS_SHIFT 24 #define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24) +#define GEN8_SURFACE_QPITCH_SHIFT 0 +#define GEN8_SURFACE_QPITCH_MASK INTEL_MASK(14, 0) /* Surface state DW2 */ #define BRW_SURFACE_HEIGHT_SHIFT 19 @@ -590,6 +594,15 @@ #define GEN7_SURFACE_MOCS_SHIFT 16 #define GEN7_SURFACE_MOCS_MASK INTEL_MASK(19, 16) +#define GEN9_SURFACE_TRMODE_SHIFT 18 +#define GEN9_SURFACE_TRMODE_MASK INTEL_MASK(19, 18) +#define GEN9_SURFACE_TRMODE_NONE 0 +#define GEN9_SURFACE_TRMODE_TILEYF 1 +#define GEN9_SURFACE_TRMODE_TILEYS 2 + +#define GEN9_SURFACE_MIP_TAIL_START_LOD_SHIFT 8 +#define GEN9_SURFACE_MIP_TAIL_START_LOD_MASK INTEL_MASK(11, 8) + /* Surface state DW6 */ #define GEN7_SURFACE_MCS_ENABLE (1 << 0) #define GEN7_SURFACE_MCS_PITCH_SHIFT 3 @@ -606,6 +619,8 @@ #define GEN8_SURFACE_AUX_MODE_HIZ 3 /* Surface state DW7 */ +#define GEN9_SURFACE_RT_COMPRESSION_SHIFT 30 +#define GEN9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30) #define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28 #define GEN7_SURFACE_SCS_R_SHIFT 25 #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) @@ -1131,6 +1146,11 @@ enum opcode { * Terminate the compute shader. */ CS_OPCODE_CS_TERMINATE, + + /** + * GLSL barrier() + */ + SHADER_OPCODE_BARRIER, }; enum brw_urb_write_flags { @@ -1592,6 +1612,14 @@ enum brw_message_target { #define BRW_SCRATCH_SPACE_SIZE_1M 10 #define BRW_SCRATCH_SPACE_SIZE_2M 11 +#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY 0 +#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY 1 +#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG 2 +#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP 3 +#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG 4 +#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5 +#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6 + #define CMD_URB_FENCE 0x6000 #define CMD_CS_URB_STATE 0x6001 @@ -1769,9 +1797,8 @@ enum brw_message_target { # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20 # define GEN7_GS_INSTANCE_CONTROL_SHIFT 15 -# define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11) -# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11) +# define GEN7_GS_DISPATCH_MODE_SHIFT 11 +# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11) # define GEN6_GS_STATISTICS_ENABLE (1 << 10) # define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) # define GEN6_GS_RENDERING_ENABLE (1 << 8) @@ -2470,8 +2497,8 @@ enum brw_wm_barycentric_interp_mode { * cache settings. We still use only either write-back or write-through; and * rely on the documented default values. */ -#define SKL_MOCS_WB 9 -#define SKL_MOCS_WT 5 +#define SKL_MOCS_WB (0b001001 << 1) +#define SKL_MOCS_WT (0b000101 << 1) #define MEDIA_VFE_STATE 0x7000 /* GEN7 DW2, GEN8+ DW3 */ diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 95e262a361b..1075c5acba5 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -402,6 +402,16 @@ static const char *const gen6_sfid[16] = { [HSW_SFID_CRE] = "cre", }; +static const char *const gen7_gateway_subfuncid[8] = { + [BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY] = "open", + [BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY] = "close", + [BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG] = "forward msg", + [BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP] = "get timestamp", + [BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG] = "barrier msg", + [BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE] = "update state", + [BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write", +}; + static const char *const dp_write_port_msg_type[8] = { [0b000] = "OWord block write", [0b001] = "OWord dual block write", @@ -977,13 +987,14 @@ src0_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_3src_src0_reg_nr(devinfo, inst)); if (err == -1) return 0; - if (src0_subreg_nr) + if (src0_subreg_nr || brw_inst_3src_src0_rep_ctrl(devinfo, inst)) format(file, ".%d", src0_subreg_nr); if (brw_inst_3src_src0_rep_ctrl(devinfo, inst)) string(file, "<0,1,0>"); - else + else { string(file, "<4,4,1>"); - err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst)); + err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst)); + } err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(devinfo, inst), NULL); return err; @@ -1003,13 +1014,14 @@ src1_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_3src_src1_reg_nr(devinfo, inst)); if (err == -1) return 0; - if (src1_subreg_nr) + if (src1_subreg_nr || brw_inst_3src_src1_rep_ctrl(devinfo, inst)) format(file, ".%d", src1_subreg_nr); if (brw_inst_3src_src1_rep_ctrl(devinfo, inst)) string(file, "<0,1,0>"); - else + else { string(file, "<4,4,1>"); - err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst)); + err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst)); + } err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(devinfo, inst), NULL); return err; @@ -1030,13 +1042,14 @@ src2_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_3src_src2_reg_nr(devinfo, inst)); if (err == -1) return 0; - if (src2_subreg_nr) + if (src2_subreg_nr || brw_inst_3src_src2_rep_ctrl(devinfo, inst)) format(file, ".%d", src2_subreg_nr); if (brw_inst_3src_src2_rep_ctrl(devinfo, inst)) string(file, "<0,1,0>"); - else + else { string(file, "<4,4,1>"); - err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst)); + err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst)); + } err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(devinfo, inst), NULL); return err; @@ -1495,6 +1508,12 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, break; case BRW_SFID_THREAD_SPAWNER: break; + + case BRW_SFID_MESSAGE_GATEWAY: + format(file, " (%s)", + gen7_gateway_subfuncid[brw_inst_gateway_subfuncid(devinfo, inst)]); + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: if (devinfo->gen >= 7) { format(file, " ("); diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 96e23697923..b91597a9f5d 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -92,8 +92,10 @@ get_hw_prim_for_gl_prim(int mode) { if (mode >= BRW_PRIM_OFFSET) return mode - BRW_PRIM_OFFSET; - else + else { + assert(mode < ARRAY_SIZE(prim_to_hw_prim)); return prim_to_hw_prim[mode]; + } } @@ -178,6 +180,7 @@ static void brw_emit_prim(struct brw_context *brw, int verts_per_instance; int vertex_access_type; int indirect_flag; + int predicate_enable; DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); @@ -258,10 +261,14 @@ static void brw_emit_prim(struct brw_context *brw, indirect_flag = 0; } - if (brw->gen >= 7) { + if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) + predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE; + else + predicate_enable = 0; + BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); OUT_BATCH(hw_prim | vertex_access_type); } else { BEGIN_BATCH(6); @@ -561,12 +568,7 @@ void brw_draw_prims( struct gl_context *ctx, assert(unused_tfb_object == NULL); - if (ctx->Query.CondRenderQuery) { - perf_debug("Conditional rendering is implemented in software and may " - "stall. This should be fixed in the driver.\n"); - } - - if (!_mesa_check_conditional_render(ctx)) + if (!brw_check_conditional_render(brw)) return; /* Handle primitive restart if needed */ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 0e7be1e1ea0..761aa0ec5fa 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -361,6 +361,8 @@ brw_jump_scale(const struct brw_device_info *devinfo) return 1; } +void brw_barrier(struct brw_codegen *p, struct brw_reg src); + /* If/else/endif. Works by manipulating the execution flags on each * channel. */ @@ -390,6 +392,8 @@ brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index, void brw_NOP(struct brw_codegen *p); +void brw_WAIT(struct brw_codegen *p); + /* Special case: there is never a destination, execution size will be * taken from src0: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 69cb114b945..67f0b45ac04 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info *devinfo, static bool has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src) { + /* EOT can only be mapped on a send if the src1 is an immediate */ + if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) && + brw_inst_eot(devinfo, src)) + return true; + /* Check for instruction bits that don't map to any of the fields of the * compacted instruction. The instruction cannot be compacted if any of * them are set. They overlap with: diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e78d0bec268..0f536046f6f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -914,6 +914,8 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD); brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD); break; + default: + unreachable("not reached"); } } @@ -3404,3 +3406,54 @@ void brw_shader_time_add(struct brw_codegen *p, brw_pop_insn_state(p); } + + +/** + * Emit the SEND message for a barrier + */ +void +brw_barrier(struct brw_codegen *p, struct brw_reg src) +{ + const struct brw_device_info *devinfo = p->devinfo; + struct brw_inst *inst; + + assert(devinfo->gen >= 7); + + inst = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, inst, brw_null_reg()); + brw_set_src0(p, inst, src); + brw_set_src1(p, inst, brw_null_reg()); + + brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY, + 1 /* msg_length */, + 0 /* response_length */, + false /* header_present */, + false /* end_of_thread */); + + brw_inst_set_gateway_notify(devinfo, inst, 1); + brw_inst_set_gateway_subfuncid(devinfo, inst, + BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG); + + brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); +} + + +/** + * Emit the wait instruction for a barrier + */ +void +brw_WAIT(struct brw_codegen *p) +{ + const struct brw_device_info *devinfo = p->devinfo; + struct brw_inst *insn; + + struct brw_reg src = brw_notification_reg(); + + insn = next_insn(p, BRW_OPCODE_WAIT); + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + + brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); +} diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5ce1dfc6633..2c0ff961182 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -49,6 +49,8 @@ #include "glsl/glsl_types.h" #include "program/sampler.h" +using namespace brw; + void fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg *src, unsigned sources) @@ -212,152 +214,13 @@ fs_inst::resize_sources(uint8_t num_sources) } } -#define ALU1(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0); \ - } - -#define ALU2(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \ - } - -#define ALU2_ACC(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1) \ - { \ - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\ - inst->writes_accumulator = true; \ - return inst; \ - } - -#define ALU3(op) \ - fs_inst * \ - fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \ - const fs_reg &src1, const fs_reg &src2) \ - { \ - return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\ - } - -ALU1(NOT) -ALU1(MOV) -ALU1(FRC) -ALU1(RNDD) -ALU1(RNDE) -ALU1(RNDZ) -ALU2(ADD) -ALU2(MUL) -ALU2_ACC(MACH) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHL) -ALU2(SHR) -ALU2(ASR) -ALU3(LRP) -ALU1(BFREV) -ALU3(BFE) -ALU2(BFI1) -ALU3(BFI2) -ALU1(FBH) -ALU1(FBL) -ALU1(CBIT) -ALU3(MAD) -ALU2_ACC(ADDC) -ALU2_ACC(SUBB) -ALU2(SEL) -ALU2(MAC) - -/** Gen4 predicated IF. */ -fs_inst * -fs_visitor::IF(enum brw_predicate predicate) -{ - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width); - inst->predicate = predicate; - return inst; -} - -/** Gen6 IF with embedded comparison. */ -fs_inst * -fs_visitor::IF(const fs_reg &src0, const fs_reg &src1, - enum brw_conditional_mod condition) -{ - assert(devinfo->gen == 6); - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width, - reg_null_d, src0, src1); - inst->conditional_mod = condition; - return inst; -} - -/** - * CMP: Sets the low bit of the destination channels with the result - * of the comparison, while the upper bits are undefined, and updates - * the flag register with the packed 16 bits of the result. - */ -fs_inst * -fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, - enum brw_conditional_mod condition) -{ - fs_inst *inst; - - /* Take the instruction: - * - * CMP null<d> src0<f> src1<f> - * - * Original gen4 does type conversion to the destination type before - * comparison, producing garbage results for floating point comparisons. - * - * The destination type doesn't matter on newer generations, so we set the - * type to match src0 so we can compact the instruction. - */ - dst.type = src0.type; - if (dst.file == HW_REG) - dst.fixed_hw_reg.type = dst.type; - - resolve_ud_negate(&src0); - resolve_ud_negate(&src1); - - inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1); - inst->conditional_mod = condition; - - return inst; -} - -fs_inst * -fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources, - int header_size) -{ - assert(dst.width % 8 == 0); - fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, dst.width, - dst, src, sources); - inst->header_size = header_size; - - for (int i = 0; i < header_size; i++) - assert(src[i].file != GRF || src[i].width * type_sz(src[i].type) == 32); - inst->regs_written = header_size; - - for (int i = header_size; i < sources; ++i) - assert(src[i].file != GRF || src[i].width == dst.width); - inst->regs_written += (sources - header_size) * (dst.width / 8); - - return inst; -} - -exec_list -fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, +void +fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, + const fs_reg &dst, const fs_reg &surf_index, const fs_reg &varying_offset, uint32_t const_offset) { - exec_list instructions; - fs_inst *inst; - /* We have our constant surface use a pitch of 4 bytes, so our index can * be any component of a vector, and then we load 4 contiguous * components starting from that. @@ -370,8 +233,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - instructions.push_tail(ADD(vec4_offset, - varying_offset, fs_reg(const_offset & ~3))); + bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3)); int scale = 1; if (devinfo->gen == 4 && dst.width == 8) { @@ -393,9 +255,8 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, int regs_written = 4 * (dst.width / 8) * scale; fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type, dst.width); - inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset); + fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; - instructions.push_tail(inst); if (devinfo->gen < 7) { inst->base_mrf = 13; @@ -406,30 +267,23 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, inst->mlen = 1 + dispatch_width / 8; } - fs_reg result = offset(vec4_result, (const_offset & 3) * scale); - instructions.push_tail(MOV(dst, result)); - - return instructions; + bld.MOV(dst, offset(vec4_result, (const_offset & 3) * scale)); } /** * A helper for MOV generation for fixing up broken hardware SEND dependency * handling. */ -fs_inst * -fs_visitor::DEP_RESOLVE_MOV(int grf) +void +fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf) { - fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); - - inst->ir = NULL; - inst->annotation = "send dependency resolve"; - /* The caller always wants uncompressed to emit the minimal extra * dependencies, and to avoid having to deal with aligning its regs to 2. */ - inst->exec_size = 8; + const fs_builder ubld = bld.annotate("send dependency resolve") + .half(0); - return inst; + ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); } bool @@ -685,7 +539,7 @@ fs_visitor::type_size(const struct glsl_type *type) * the destination of the MOV, with extra parameters set. */ fs_reg -fs_visitor::get_timestamp(fs_inst **out_mov) +fs_visitor::get_timestamp(const fs_builder &bld) { assert(devinfo->gen >= 7); @@ -696,11 +550,10 @@ fs_visitor::get_timestamp(fs_inst **out_mov) fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4); - fs_inst *mov = MOV(dst, ts); /* We want to read the 3 fields we care about even if it's not enabled in * the dispatch. */ - mov->force_writemask_all = true; + bld.exec_all().MOV(dst, ts); /* The caller wants the low 32 bits of the timestamp. Since it's running * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, @@ -714,105 +567,60 @@ fs_visitor::get_timestamp(fs_inst **out_mov) */ dst.set_smear(0); - *out_mov = mov; return dst; } void fs_visitor::emit_shader_time_begin() { - current_annotation = "shader time start"; - fs_inst *mov; - shader_start_time = get_timestamp(&mov); - emit(mov); + shader_start_time = get_timestamp(bld.annotate("shader time start")); } void fs_visitor::emit_shader_time_end() { - current_annotation = "shader time end"; - - enum shader_time_shader_type type, written_type, reset_type; - switch (stage) { - case MESA_SHADER_VERTEX: - type = ST_VS; - written_type = ST_VS_WRITTEN; - reset_type = ST_VS_RESET; - break; - case MESA_SHADER_GEOMETRY: - type = ST_GS; - written_type = ST_GS_WRITTEN; - reset_type = ST_GS_RESET; - break; - case MESA_SHADER_FRAGMENT: - if (dispatch_width == 8) { - type = ST_FS8; - written_type = ST_FS8_WRITTEN; - reset_type = ST_FS8_RESET; - } else { - assert(dispatch_width == 16); - type = ST_FS16; - written_type = ST_FS16_WRITTEN; - reset_type = ST_FS16_RESET; - } - break; - case MESA_SHADER_COMPUTE: - type = ST_CS; - written_type = ST_CS_WRITTEN; - reset_type = ST_CS_RESET; - break; - default: - unreachable("fs_visitor::emit_shader_time_end missing code"); - } - /* Insert our code just before the final SEND with EOT. */ exec_node *end = this->instructions.get_tail(); assert(end && ((fs_inst *) end)->eot); + const fs_builder ibld = bld.annotate("shader time end") + .exec_all().at(NULL, end); - fs_inst *tm_read; - fs_reg shader_end_time = get_timestamp(&tm_read); - end->insert_before(tm_read); + fs_reg shader_end_time = get_timestamp(ibld); /* Check that there weren't any timestamp reset events (assuming these * were the only two timestamp reads that happened). */ fs_reg reset = shader_end_time; reset.set_smear(2); - fs_inst *test = AND(reg_null_d, reset, fs_reg(1u)); - test->conditional_mod = BRW_CONDITIONAL_Z; - test->force_writemask_all = true; - end->insert_before(test); - end->insert_before(IF(BRW_PREDICATE_NORMAL)); + set_condmod(BRW_CONDITIONAL_Z, + ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u))); + ibld.IF(BRW_PREDICATE_NORMAL); fs_reg start = shader_start_time; start.negate = true; fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1); diff.set_smear(0); - fs_inst *add = ADD(diff, start, shader_end_time); - add->force_writemask_all = true; - end->insert_before(add); + ibld.ADD(diff, start, shader_end_time); /* If there were no instructions between the two timestamp gets, the diff * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - add = ADD(diff, diff, fs_reg(-2u)); - add->force_writemask_all = true; - end->insert_before(add); - - end->insert_before(SHADER_TIME_ADD(type, diff)); - end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u))); - end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width)); - end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u))); - end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width)); + ibld.ADD(diff, diff, fs_reg(-2u)); + SHADER_TIME_ADD(ibld, 0, diff); + SHADER_TIME_ADD(ibld, 1, fs_reg(1u)); + ibld.emit(BRW_OPCODE_ELSE); + SHADER_TIME_ADD(ibld, 2, fs_reg(1u)); + ibld.emit(BRW_OPCODE_ENDIF); } -fs_inst * -fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value) +void +fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, + int shader_time_subindex, + fs_reg value) { - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE); + int index = shader_time_index * 3 + shader_time_subindex; + fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE); fs_reg payload; if (dispatch_width == 8) @@ -820,8 +628,7 @@ fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value) else payload = vgrf(glsl_type::uint_type); - return new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD, - fs_reg(), payload, offset, value); + bld.emit(SHADER_OPCODE_SHADER_TIME_ADD, fs_reg(), payload, offset, value); } void @@ -864,65 +671,16 @@ fs_visitor::fail(const char *format, ...) * During a SIMD16 compile (if one happens anyway), this just calls fail(). */ void -fs_visitor::no16(const char *format, ...) +fs_visitor::no16(const char *msg) { - va_list va; - - va_start(va, format); - if (dispatch_width == 16) { - vfail(format, va); + fail("%s", msg); } else { simd16_unsupported = true; - if (brw->perf_debug) { - if (no16_msg) - ralloc_vasprintf_append(&no16_msg, format, va); - else - no16_msg = ralloc_vasprintf(mem_ctx, format, va); - } + compiler->shader_perf_log(log_data, + "SIMD16 shader failed to compile: %s", msg); } - - va_end(va); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode) -{ - return emit(new(mem_ctx) fs_inst(opcode, dispatch_width)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1, const fs_reg &src2) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2)); -} - -fs_inst * -fs_visitor::emit(enum opcode opcode, const fs_reg &dst, - fs_reg src[], int sources) -{ - return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources)); } /** @@ -1051,7 +809,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: return inst->mlen; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - return 2; + return inst->mlen; case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: @@ -1077,14 +835,6 @@ fs_visitor::vgrf(const glsl_type *const type) brw_type_for_base_type(type), dispatch_width); } -fs_reg -fs_visitor::vgrf(int num_components) -{ - int reg_width = dispatch_width / 8; - return fs_reg(GRF, alloc.allocate(num_components * reg_width), - BRW_REGISTER_TYPE_F, dispatch_width); -} - /** Fixed HW reg constructor. */ fs_reg::fs_reg(enum register_file file, int reg) { @@ -1130,117 +880,18 @@ fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type, this->width = width; } -fs_reg * -fs_visitor::variable_storage(ir_variable *var) -{ - return (fs_reg *)hash_table_find(this->variable_ht, var); -} - -void -import_uniforms_callback(const void *key, - void *data, - void *closure) -{ - struct hash_table *dst_ht = (struct hash_table *)closure; - const fs_reg *reg = (const fs_reg *)data; - - if (reg->file != UNIFORM) - return; - - hash_table_insert(dst_ht, data, key); -} - /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch. * This brings in those uniform definitions */ void fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(v->variable_ht, - import_uniforms_callback, - variable_ht); this->push_constant_loc = v->push_constant_loc; this->pull_constant_loc = v->pull_constant_loc; this->uniforms = v->uniforms; this->param_size = v->param_size; } -/* Our support for uniforms is piggy-backed on the struct - * gl_fragment_program, because that's where the values actually - * get stored, rather than in some global gl_shader_program uniform - * store. - */ -void -fs_visitor::setup_uniform_values(ir_variable *ir) -{ - int namelen = strlen(ir->name); - - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage and find anything - * with our name, or the prefix of a component that starts with our name. - */ - unsigned params_before = uniforms; - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { - struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; - - if (strncmp(ir->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { - continue; - } - - unsigned slots = storage->type->component_slots(); - if (storage->array_elements) - slots *= storage->array_elements; - - for (unsigned i = 0; i < slots; i++) { - stage_prog_data->param[uniforms++] = &storage->storage[i]; - } - } - - /* Make sure we actually initialized the right amount of stuff here. */ - assert(params_before + ir->type->component_slots() == uniforms); - (void)params_before; -} - - -/* Our support for builtin uniforms is even scarier than non-builtin. - * It sits on top of the PROG_STATE_VAR parameters that are - * automatically updated from GL context state. - */ -void -fs_visitor::setup_builtin_uniform_values(ir_variable *ir) -{ - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); - - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - /* This state reference has already been setup by ir_to_mesa, but we'll - * get the same index back here. - */ - int index = _mesa_add_state_reference(this->prog->Parameters, - (gl_state_index *)slots[i].tokens); - - /* Add each of the unique swizzles of the element as a parameter. - * This'll end up matching the expected layout of the - * array/matrix/structure we're trying to fill in. - */ - int last_swiz = -1; - for (unsigned int j = 0; j < 4; j++) { - int swiz = GET_SWZ(slots[i].swizzle, j); - if (swiz == last_swiz) - break; - last_swiz = swiz; - - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][swiz]; - } - } -} - fs_reg * fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, bool origin_upper_left) @@ -1253,15 +904,15 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, /* gl_FragCoord.x */ if (pixel_center_integer) { - emit(MOV(wpos, this->pixel_x)); + bld.MOV(wpos, this->pixel_x); } else { - emit(ADD(wpos, this->pixel_x, fs_reg(0.5f))); + bld.ADD(wpos, this->pixel_x, fs_reg(0.5f)); } wpos = offset(wpos, 1); /* gl_FragCoord.y */ if (!flip && pixel_center_integer) { - emit(MOV(wpos, this->pixel_y)); + bld.MOV(wpos, this->pixel_y); } else { fs_reg pixel_y = this->pixel_y; float offset = (pixel_center_integer ? 0.0 : 0.5); @@ -1271,22 +922,22 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, offset += key->drawable_height - 1.0; } - emit(ADD(wpos, pixel_y, fs_reg(offset))); + bld.ADD(wpos, pixel_y, fs_reg(offset)); } wpos = offset(wpos, 1); /* gl_FragCoord.z */ if (devinfo->gen >= 6) { - emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); + bld.MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))); } else { - emit(FS_OPCODE_LINTERP, wpos, + bld.emit(FS_OPCODE_LINTERP, wpos, this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], interp_reg(VARYING_SLOT_POS, 2)); } wpos = offset(wpos, 1); /* gl_FragCoord.w: Already set up in emit_interpolation */ - emit(BRW_OPCODE_MOV, wpos, this->wpos_w); + bld.MOV(wpos, this->wpos_w); return reg; } @@ -1321,8 +972,8 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp, */ barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; } - return emit(FS_OPCODE_LINTERP, attr, - this->delta_xy[barycoord_mode], interp); + return bld.emit(FS_OPCODE_LINTERP, attr, + this->delta_xy[barycoord_mode], interp); } void @@ -1380,7 +1031,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, struct brw_reg interp = interp_reg(location, k); interp = suboffset(interp, 3); interp.type = attr.type; - emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); + bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); attr = offset(attr, 1); } } else { @@ -1393,7 +1044,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, * unlit, replace the centroid data with non-centroid * data. */ - emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); fs_inst *inst; inst = emit_linterp(attr, fs_reg(interp), interpolation_mode, @@ -1417,7 +1068,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, mod_sample || key->persample_shading); } if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { - emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); + bld.MUL(attr, attr, this->pixel_w); } attr = offset(attr, 1); } @@ -1448,7 +1099,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); g0.negate = true; - emit(ASR(*reg, g0, fs_reg(15))); + bld.ASR(*reg, g0, fs_reg(15)); } else { /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create * a boolean result from this (1/true or 0/false). @@ -1463,7 +1114,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); g1_6.negate = true; - emit(ASR(*reg, g1_6, fs_reg(31))); + bld.ASR(*reg, g1_6, fs_reg(31)); } return reg; @@ -1478,9 +1129,9 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) if (key->compute_pos_offset) { /* Convert int_sample_pos to floating point */ - emit(MOV(dst, int_sample_pos)); + bld.MOV(dst, int_sample_pos); /* Scale to the range [0, 1] */ - emit(MUL(dst, dst, fs_reg(1 / 16.0f))); + bld.MUL(dst, dst, fs_reg(1 / 16.0f)); } else { /* From ARB_sample_shading specification: @@ -1488,7 +1139,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) * rasterization is disabled, gl_SamplePosition will always be * (0.5, 0.5). */ - emit(MOV(dst, fs_reg(0.5f))); + bld.MOV(dst, fs_reg(0.5f)); } } @@ -1497,7 +1148,7 @@ fs_visitor::emit_samplepos_setup() { assert(devinfo->gen >= 6); - this->current_annotation = "compute sample position"; + const fs_builder abld = bld.annotate("compute sample position"); fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec2_type)); fs_reg pos = *reg; fs_reg int_sample_x = vgrf(glsl_type::int_type); @@ -1519,22 +1170,22 @@ fs_visitor::emit_samplepos_setup() BRW_REGISTER_TYPE_B), 16, 8, 2); if (dispatch_width == 8) { - emit(MOV(int_sample_x, fs_reg(sample_pos_reg))); + abld.MOV(int_sample_x, fs_reg(sample_pos_reg)); } else { - emit(MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg))); - emit(MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16)))) - ->force_sechalf = true; + abld.half(0).MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg)); + abld.half(1).MOV(half(int_sample_x, 1), + fs_reg(suboffset(sample_pos_reg, 16))); } /* Compute gl_SamplePosition.x */ compute_sample_position(pos, int_sample_x); pos = offset(pos, 1); if (dispatch_width == 8) { - emit(MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)))); + abld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1))); } else { - emit(MOV(half(int_sample_y, 0), - fs_reg(suboffset(sample_pos_reg, 1)))); - emit(MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17)))) - ->force_sechalf = true; + abld.half(0).MOV(half(int_sample_y, 0), + fs_reg(suboffset(sample_pos_reg, 1))); + abld.half(1).MOV(half(int_sample_y, 1), + fs_reg(suboffset(sample_pos_reg, 17))); } /* Compute gl_SamplePosition.y */ compute_sample_position(pos, int_sample_y); @@ -1548,7 +1199,7 @@ fs_visitor::emit_sampleid_setup() brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; assert(devinfo->gen >= 6); - this->current_annotation = "compute sample id"; + const fs_builder abld = bld.annotate("compute sample id"); fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type)); if (key->compute_sample_id) { @@ -1575,26 +1226,25 @@ fs_visitor::emit_sampleid_setup() * are sample 1 of subspan 0; the third group is sample 0 of * subspan 1, and finally sample 1 of subspan 1. */ - fs_inst *inst; - inst = emit(BRW_OPCODE_AND, t1, - fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), - fs_reg(0xc0)); - inst->force_writemask_all = true; - inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5)); - inst->force_writemask_all = true; + abld.exec_all() + .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), + fs_reg(0xc0)); + abld.exec_all().SHR(t1, t1, fs_reg(5)); + /* This works for both SIMD8 and SIMD16 */ - inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210))); - inst->force_writemask_all = true; + abld.exec_all() + .MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)); + /* This special instruction takes care of setting vstride=1, * width=4, hstride=0 of t2 during an ADD instruction. */ - emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2); + abld.emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2); } else { /* As per GL_ARB_sample_shading specification: * "When rendering to a non-multisample buffer, or if multisample * rasterization is disabled, gl_SampleID will always be zero." */ - emit(BRW_OPCODE_MOV, *reg, fs_reg(0)); + abld.MOV(*reg, fs_reg(0)); } return reg; @@ -1606,111 +1256,11 @@ fs_visitor::resolve_source_modifiers(fs_reg *src) if (!src->abs && !src->negate) return; - fs_reg temp = retype(vgrf(1), src->type); - emit(MOV(temp, *src)); + fs_reg temp = bld.vgrf(src->type); + bld.MOV(temp, *src); *src = temp; } -fs_reg -fs_visitor::fix_math_operand(fs_reg src) -{ - /* Can't do hstride == 0 args on gen6 math, so expand it out. We - * might be able to do better by doing execsize = 1 math and then - * expanding that result out, but we would need to be careful with - * masking. - * - * The hardware ignores source modifiers (negate and abs) on math - * instructions, so we also move to a temp to set those up. - */ - if (devinfo->gen == 6 && src.file != UNIFORM && src.file != IMM && - !src.abs && !src.negate) - return src; - - /* Gen7 relaxes most of the above restrictions, but still can't use IMM - * operands to math - */ - if (devinfo->gen >= 7 && src.file != IMM) - return src; - - fs_reg expanded = vgrf(glsl_type::float_type); - expanded.type = src.type; - emit(BRW_OPCODE_MOV, expanded, src); - return expanded; -} - -fs_inst * -fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) -{ - switch (opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - break; - default: - unreachable("not reached: bad math opcode"); - } - - /* Can't do hstride == 0 args to gen6 math, so expand it out. We - * might be able to do better by doing execsize = 1 math and then - * expanding that result out, but we would need to be careful with - * masking. - * - * Gen 6 hardware ignores source modifiers (negate and abs) on math - * instructions, so we also move to a temp to set those up. - */ - if (devinfo->gen == 6 || devinfo->gen == 7) - src = fix_math_operand(src); - - fs_inst *inst = emit(opcode, dst, src); - - if (devinfo->gen < 6) { - inst->base_mrf = 2; - inst->mlen = dispatch_width / 8; - } - - return inst; -} - -fs_inst * -fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) -{ - int base_mrf = 2; - fs_inst *inst; - - if (devinfo->gen >= 8) { - inst = emit(opcode, dst, src0, src1); - } else if (devinfo->gen >= 6) { - src0 = fix_math_operand(src0); - src1 = fix_math_operand(src1); - - inst = emit(opcode, dst, src0, src1); - } else { - /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 - * "Message Payload": - * - * "Operand0[7]. For the INT DIV functions, this operand is the - * denominator." - * ... - * "Operand1[7]. For the INT DIV functions, this operand is the - * numerator." - */ - bool is_int_div = opcode != SHADER_OPCODE_POW; - fs_reg &op0 = is_int_div ? src1 : src0; - fs_reg &op1 = is_int_div ? src0 : src1; - - emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1)); - inst = emit(opcode, dst, op0, reg_null_f); - - inst->base_mrf = base_mrf; - inst->mlen = 2 * dispatch_width / 8; - } - return inst; -} - void fs_visitor::emit_discard_jump() { @@ -1719,7 +1269,7 @@ fs_visitor::emit_discard_jump() /* For performance, after a discard, jump to the end of the * shader if all relevant channels have been discarded. */ - fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP); + fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP); discard_jump->flag_subreg = 1; discard_jump->predicate = (dispatch_width == 8) @@ -2317,26 +1867,22 @@ fs_visitor::demote_pull_constants() continue; /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; - + const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) + .at(block, inst); fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start); fs_reg dst = vgrf(glsl_type::float_type); /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { - exec_list list = VARYING_PULL_CONSTANT_LOAD(dst, - surf_index, - *inst->src[i].reladdr, - pull_index); - inst->insert_before(block, &list); + VARYING_PULL_CONSTANT_LOAD(ibld, dst, + surf_index, + *inst->src[i].reladdr, + pull_index); inst->src[i].reladdr = NULL; } else { fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); - fs_inst *pull = - new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, - dst, surf_index, offset); - inst->insert_before(block, pull); + ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + dst, surf_index, offset); inst->src[i].set_smear(pull_index & 3); } @@ -2663,6 +2209,16 @@ fs_visitor::opt_sampler_eot() if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex()) return false; + /* This optimisation doesn't seem to work for textureGather for some + * reason. I can't find any documentation or known workarounds to indicate + * that this is expected, but considering that it is probably pretty + * unlikely that a shader would directly write out the results from + * textureGather we might as well just disable it. + */ + if (tex_inst->opcode == SHADER_OPCODE_TG4 || + tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET) + return false; + /* If there's no header present, we need to munge the LOAD_PAYLOAD as well. * It's very likely to be the previous instruction. */ @@ -2676,7 +2232,7 @@ fs_visitor::opt_sampler_eot() tex_inst->offset |= fb_write->target << 24; tex_inst->eot = true; - tex_inst->dst = reg_null_ud; + tex_inst->dst = bld.null_reg_ud(); fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); /* If a header is present, marking the eot is sufficient. Otherwise, we need @@ -2688,7 +2244,8 @@ fs_visitor::opt_sampler_eot() if (tex_inst->header_size != 0) return true; - fs_reg send_header = vgrf(load_payload->sources + 1); + fs_reg send_header = bld.vgrf(BRW_REGISTER_TYPE_F, + load_payload->sources + 1); fs_reg *new_sources = ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); @@ -3041,8 +2598,8 @@ fs_visitor::emit_repclear_shader() fs_inst *mov; if (uniforms == 1) { - mov = emit(MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F))); + mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); } else { struct brw_reg reg = brw_reg(BRW_GENERAL_REGISTER_FILE, @@ -3051,14 +2608,13 @@ fs_visitor::emit_repclear_shader() BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - mov = emit(MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg))); + mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), + fs_reg(reg)); } - mov->force_writemask_all = true; - fs_inst *write; if (key->nr_color_regions == 1) { - write = emit(FS_OPCODE_REP_FB_WRITE); + write = bld.emit(FS_OPCODE_REP_FB_WRITE); write->saturate = key->clamp_fragment_color; write->base_mrf = color_mrf; write->target = 0; @@ -3067,7 +2623,7 @@ fs_visitor::emit_repclear_shader() } else { assume(key->nr_color_regions > 0); for (int i = 0; i < key->nr_color_regions; ++i) { - write = emit(FS_OPCODE_REP_FB_WRITE); + write = bld.emit(FS_OPCODE_REP_FB_WRITE); write->saturate = key->clamp_fragment_color; write->base_mrf = base_mrf; write->target = i; @@ -3223,9 +2779,8 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, */ if (block->start() == scan_inst) { for (int i = 0; i < write_len; i++) { - if (needs_dep[i]) { - inst->insert_before(block, DEP_RESOLVE_MOV(first_write_grf + i)); - } + if (needs_dep[i]) + DEP_RESOLVE_MOV(bld.at(block, inst), first_write_grf + i); } return; } @@ -3241,7 +2796,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, if (reg >= first_write_grf && reg < first_write_grf + write_len && needs_dep[reg - first_write_grf]) { - inst->insert_before(block, DEP_RESOLVE_MOV(reg)); + DEP_RESOLVE_MOV(bld.at(block, inst), reg); needs_dep[reg - first_write_grf] = false; if (scan_inst->exec_size == 16) needs_dep[reg - first_write_grf + 1] = false; @@ -3288,8 +2843,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins if (block->end() == scan_inst) { for (int i = 0; i < write_len; i++) { if (needs_dep[i]) - scan_inst->insert_before(block, - DEP_RESOLVE_MOV(first_write_grf + i)); + DEP_RESOLVE_MOV(bld.at(block, scan_inst), first_write_grf + i); } return; } @@ -3304,7 +2858,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins scan_inst->dst.reg >= first_write_grf && scan_inst->dst.reg < first_write_grf + write_len && needs_dep[scan_inst->dst.reg - first_write_grf]) { - scan_inst->insert_before(block, DEP_RESOLVE_MOV(scan_inst->dst.reg)); + DEP_RESOLVE_MOV(bld.at(block, scan_inst), scan_inst->dst.reg); needs_dep[scan_inst->dst.reg - first_write_grf] = false; } @@ -3429,6 +2983,9 @@ fs_visitor::lower_load_payload() assert(inst->dst.file == MRF || inst->dst.file == GRF); assert(inst->saturate == false); + const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf) + .exec_all(inst->force_writemask_all) + .at(block, inst); fs_reg dst = inst->dst; /* Get rid of COMPR4. We'll add it back in if we need it */ @@ -3441,9 +2998,7 @@ fs_visitor::lower_load_payload() fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD); fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD); mov_src.width = 8; - fs_inst *mov = MOV(mov_dst, mov_src); - mov->force_writemask_all = true; - inst->insert_before(block, mov); + ibld.exec_all().MOV(mov_dst, mov_src); } dst = offset(dst, 1); } @@ -3474,23 +3029,13 @@ fs_visitor::lower_load_payload() if (devinfo->has_compr4) { fs_reg compr4_dst = retype(dst, inst->src[i].type); compr4_dst.reg |= BRW_MRF_COMPR4; - - fs_inst *mov = MOV(compr4_dst, inst->src[i]); - mov->force_writemask_all = inst->force_writemask_all; - inst->insert_before(block, mov); + ibld.MOV(compr4_dst, inst->src[i]); } else { /* Platform doesn't have COMPR4. We have to fake it */ fs_reg mov_dst = retype(dst, inst->src[i].type); mov_dst.width = 8; - - fs_inst *mov = MOV(mov_dst, half(inst->src[i], 0)); - mov->force_writemask_all = inst->force_writemask_all; - inst->insert_before(block, mov); - - mov = MOV(offset(mov_dst, 4), half(inst->src[i], 1)); - mov->force_writemask_all = inst->force_writemask_all; - mov->force_sechalf = true; - inst->insert_before(block, mov); + ibld.half(0).MOV(mov_dst, half(inst->src[i], 0)); + ibld.half(1).MOV(offset(mov_dst, 4), half(inst->src[i], 1)); } } @@ -3513,12 +3058,8 @@ fs_visitor::lower_load_payload() } for (uint8_t i = inst->header_size; i < inst->sources; i++) { - if (inst->src[i].file != BAD_FILE) { - fs_inst *mov = MOV(retype(dst, inst->src[i].type), - inst->src[i]); - mov->force_writemask_all = inst->force_writemask_all; - inst->insert_before(block, mov); - } + if (inst->src[i].file != BAD_FILE) + ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]); dst = offset(dst, 1); } @@ -3532,6 +3073,172 @@ fs_visitor::lower_load_payload() return progress; } +bool +fs_visitor::lower_integer_multiplication() +{ + bool progress = false; + + /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation + * directly, but Cherryview cannot. + */ + if (devinfo->gen >= 8 && !devinfo->is_cherryview) + return false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->opcode != BRW_OPCODE_MUL || + inst->dst.is_accumulator() || + (inst->dst.type != BRW_REGISTER_TYPE_D && + inst->dst.type != BRW_REGISTER_TYPE_UD)) + continue; + + const fs_builder ibld = bld.at(block, inst); + + /* The MUL instruction isn't commutative. On Gen <= 6, only the low + * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of + * src1 are used. + * + * If multiplying by an immediate value that fits in 16-bits, do a + * single MUL instruction with that value in the proper location. + */ + if (inst->src[1].file == IMM && + inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) { + if (devinfo->gen < 7) { + fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type, dispatch_width); + ibld.MOV(imm, inst->src[1]); + ibld.MUL(inst->dst, imm, inst->src[0]); + } else { + ibld.MUL(inst->dst, inst->src[0], inst->src[1]); + } + } else { + /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot + * do 32-bit integer multiplication in one instruction, but instead + * must do a sequence (which actually calculates a 64-bit result): + * + * mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D + * mach(8) null g3<8,8,1>D g4<8,8,1>D + * mov(8) g2<1>D acc0<8,8,1>D + * + * But on Gen > 6, the ability to use second accumulator register + * (acc1) for non-float data types was removed, preventing a simple + * implementation in SIMD16. A 16-channel result can be calculated by + * executing the three instructions twice in SIMD8, once with quarter + * control of 1Q for the first eight channels and again with 2Q for + * the second eight channels. + * + * Which accumulator register is implicitly accessed (by AccWrEnable + * for instance) is determined by the quarter control. Unfortunately + * Ivybridge (and presumably Baytrail) has a hardware bug in which an + * implicit accumulator access by an instruction with 2Q will access + * acc1 regardless of whether the data type is usable in acc1. + * + * Specifically, the 2Q mach(8) writes acc1 which does not exist for + * integer data types. + * + * Since we only want the low 32-bits of the result, we can do two + * 32-bit x 16-bit multiplies (like the mul and mach are doing), and + * adjust the high result and add them (like the mach is doing): + * + * mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW + * mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW + * shl(8) g9<1>D g8<8,8,1>D 16D + * add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D + * + * We avoid the shl instruction by realizing that we only want to add + * the low 16-bits of the "high" result to the high 16-bits of the + * "low" result and using proper regioning on the add: + * + * mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW + * mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW + * add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW + * + * Since it does not use the (single) accumulator register, we can + * schedule multi-component multiplications much better. + */ + + if (inst->conditional_mod && inst->dst.is_null()) { + inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type, dispatch_width); + } + fs_reg low = inst->dst; + fs_reg high(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type, dispatch_width); + + if (devinfo->gen >= 7) { + fs_reg src1_0_w = inst->src[1]; + fs_reg src1_1_w = inst->src[1]; + + if (inst->src[1].file == IMM) { + src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff; + src1_1_w.fixed_hw_reg.dw1.ud >>= 16; + } else { + src1_0_w.type = BRW_REGISTER_TYPE_UW; + if (src1_0_w.stride != 0) { + assert(src1_0_w.stride == 1); + src1_0_w.stride = 2; + } + + src1_1_w.type = BRW_REGISTER_TYPE_UW; + if (src1_1_w.stride != 0) { + assert(src1_1_w.stride == 1); + src1_1_w.stride = 2; + } + src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); + } + ibld.MUL(low, inst->src[0], src1_0_w); + ibld.MUL(high, inst->src[0], src1_1_w); + } else { + fs_reg src0_0_w = inst->src[0]; + fs_reg src0_1_w = inst->src[0]; + + src0_0_w.type = BRW_REGISTER_TYPE_UW; + if (src0_0_w.stride != 0) { + assert(src0_0_w.stride == 1); + src0_0_w.stride = 2; + } + + src0_1_w.type = BRW_REGISTER_TYPE_UW; + if (src0_1_w.stride != 0) { + assert(src0_1_w.stride == 1); + src0_1_w.stride = 2; + } + src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); + + ibld.MUL(low, src0_0_w, inst->src[1]); + ibld.MUL(high, src0_1_w, inst->src[1]); + } + + fs_reg dst = inst->dst; + dst.type = BRW_REGISTER_TYPE_UW; + dst.subreg_offset = 2; + dst.stride = 2; + + high.type = BRW_REGISTER_TYPE_UW; + high.stride = 2; + + low.type = BRW_REGISTER_TYPE_UW; + low.subreg_offset = 2; + low.stride = 2; + + ibld.ADD(dst, low, high); + + if (inst->conditional_mod) { + fs_reg null(retype(ibld.null_reg_f(), inst->dst.type)); + set_condmod(inst->conditional_mod, + ibld.MOV(null, inst->dst)); + } + } + + inst->remove(block); + progress = true; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + void fs_visitor::dump_instructions() { @@ -3602,6 +3309,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) } fprintf(file, "(%d) ", inst->exec_size); + if (inst->mlen) { + fprintf(file, "(mlen: %d) ", inst->mlen); + } switch (inst->dst.file) { case GRF: @@ -3895,7 +3605,7 @@ fs_visitor::setup_vs_payload() void fs_visitor::setup_cs_payload() { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); payload.num_regs = 1; } @@ -3938,6 +3648,17 @@ fs_visitor::calculate_register_pressure() void fs_visitor::optimize() { + /* bld is the common builder object pointing at the end of the program we + * used to translate it into i965 IR. For the optimization and lowering + * passes coming next, any code added after the end of the program without + * having explicitly called fs_builder::at() clearly points at a mistake. + * Ideally optimization passes wouldn't be part of the visitor so they + * wouldn't have access to bld at all, but they do, so just in case some + * pass forgets to ask for a location explicitly set it to NULL here to + * make it trip. + */ + bld = bld.at(NULL, NULL); + split_virtual_grfs(); move_uniform_array_access_to_pull_constants(); @@ -3953,7 +3674,7 @@ fs_visitor::optimize() snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \ stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ - backend_visitor::dump_instructions(filename); \ + backend_shader::dump_instructions(filename); \ } \ \ progress = progress || this_progress; \ @@ -3966,7 +3687,7 @@ fs_visitor::optimize() stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0); - backend_visitor::dump_instructions(filename); + backend_shader::dump_instructions(filename); } bool progress; @@ -4010,6 +3731,7 @@ fs_visitor::optimize() } OPT(opt_combine_constants); + OPT(lower_integer_multiplication); lower_uniform_pull_constant_loads(); } @@ -4066,9 +3788,11 @@ fs_visitor::allocate_registers() fail("Failure to register allocate. Reduce number of " "live scalar values to avoid this."); } else { - perf_debug("%s shader triggered register spilling. " - "Try reducing the number of live scalar values to " - "improve performance.\n", stage_name); + compiler->shader_perf_log(log_data, + "%s shader triggered register spilling. " + "Try reducing the number of live scalar " + "values to improve performance.\n", + stage_name); } /* Since we're out of heuristics, just go spill registers until we @@ -4097,7 +3821,7 @@ fs_visitor::allocate_registers() } bool -fs_visitor::run_vs() +fs_visitor::run_vs(gl_clip_plane *clip_planes) { assert(stage == MESA_SHADER_VERTEX); @@ -4105,26 +3829,17 @@ fs_visitor::run_vs() assign_common_binding_table_offsets(0); setup_vs_payload(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); - if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) { - emit_nir_code(); - } else { - foreach_in_list(ir_instruction, ir, shader->base.ir) { - base_ir = ir; - this->result = reg_undef; - ir->accept(this); - } - base_ir = NULL; - } + emit_nir_code(); if (failed) return false; - emit_urb_writes(); + emit_urb_writes(clip_planes); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_end(); calculate_cfg(); @@ -4141,7 +3856,7 @@ fs_visitor::run_vs() } bool -fs_visitor::run_fs() +fs_visitor::run_fs(bool do_rep_send) { brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data; brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key; @@ -4160,10 +3875,11 @@ fs_visitor::run_fs() if (0) { emit_dummy_fs(); - } else if (brw->use_rep_send && dispatch_width == 16) { + } else if (do_rep_send) { + assert(dispatch_width == 16); emit_repclear_shader(); } else { - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); calculate_urb_setup(); @@ -4178,37 +3894,27 @@ fs_visitor::run_fs() * Initialize it with the dispatched pixels. */ if (wm_prog_data->uses_kill) { - fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + fs_inst *discard_init = bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); discard_init->flag_subreg = 1; } /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) { - emit_nir_code(); - } else if (shader) { - foreach_in_list(ir_instruction, ir, shader->base.ir) { - base_ir = ir; - this->result = reg_undef; - ir->accept(this); - } - } else { - emit_fragment_program_code(); - } - base_ir = NULL; + emit_nir_code(); + if (failed) return false; if (wm_prog_data->uses_kill) - emit(FS_OPCODE_PLACEHOLDER_HALT); + bld.emit(FS_OPCODE_PLACEHOLDER_HALT); if (wm_key->alpha_test_func) emit_alpha_test(); emit_fb_writes(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_end(); calculate_cfg(); @@ -4252,7 +3958,7 @@ fs_visitor::run_cs() setup_cs_payload(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); emit_nir_code(); @@ -4262,7 +3968,7 @@ fs_visitor::run_cs() emit_cs_terminate(); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_end(); calculate_cfg(); @@ -4312,10 +4018,18 @@ brw_wm_fs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_WM)) brw_dump_ir("fragment", prog, &shader->base, &fp->Base); + int st_index8 = -1, st_index16 = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) { + st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8); + st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16); + } + /* Now the main event: Visit the shader IR and generate our FS IR for it. */ - fs_visitor v(brw, mem_ctx, key, prog_data, prog, fp, 8); - if (!v.run_fs()) { + fs_visitor v(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, + prog, &fp->Base, 8, st_index8); + if (!v.run_fs(false /* do_rep_send */)) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, v.fail_msg); @@ -4328,20 +4042,18 @@ brw_wm_fs_emit(struct brw_context *brw, } cfg_t *simd16_cfg = NULL; - fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16); + fs_visitor v2(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, + prog, &fp->Base, 16, st_index16); if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ v2.import_uniforms(&v); - if (!v2.run_fs()) { - perf_debug("SIMD16 shader failed to compile, falling back to " - "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg); + if (!v2.run_fs(brw->use_rep_send)) { + perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg); } else { simd16_cfg = v2.cfg; } - } else { - perf_debug("SIMD16 shader unsupported, falling back to " - "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg); } } @@ -4355,7 +4067,8 @@ brw_wm_fs_emit(struct brw_context *brw, prog_data->no_8 = false; } - fs_generator g(brw, mem_ctx, (void *) key, &prog_data->base, + fs_generator g(brw->intelScreen->compiler, brw, + mem_ctx, (void *) key, &prog_data->base, &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS"); if (unlikely(INTEL_DEBUG & DEBUG_WM)) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1d7de2effbd..243baf688de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -29,6 +29,7 @@ #include "brw_shader.h" #include "brw_ir_fs.h" +#include "brw_fs_builder.h" extern "C" { @@ -66,138 +67,44 @@ namespace brw { * * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR. */ -class fs_visitor : public backend_visitor +class fs_visitor : public backend_shader { public: - const fs_reg reg_null_f; - const fs_reg reg_null_d; - const fs_reg reg_null_ud; - - fs_visitor(struct brw_context *brw, + fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data, + gl_shader_stage stage, + const void *key, + struct brw_stage_prog_data *prog_data, struct gl_shader_program *shader_prog, - struct gl_fragment_program *fp, - unsigned dispatch_width); - - fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_vs_prog_key *key, - struct brw_vs_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_vertex_program *cp, - unsigned dispatch_width); - - fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_cs_prog_key *key, - struct brw_cs_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_compute_program *cp, - unsigned dispatch_width); + struct gl_program *prog, + unsigned dispatch_width, + int shader_time_index); ~fs_visitor(); - void init(); - fs_reg *variable_storage(ir_variable *var); fs_reg vgrf(const glsl_type *const type); - fs_reg vgrf(int num_components); void import_uniforms(fs_visitor *v); - void setup_uniform_clipplane_values(); - void compute_clip_distance(); - - void visit(ir_variable *ir); - void visit(ir_assignment *ir); - void visit(ir_dereference_variable *ir); - void visit(ir_dereference_record *ir); - void visit(ir_dereference_array *ir); - void visit(ir_expression *ir); - void visit(ir_texture *ir); - void visit(ir_if *ir); - void visit(ir_constant *ir); - void visit(ir_swizzle *ir); - void visit(ir_return *ir); - void visit(ir_loop *ir); - void visit(ir_loop_jump *ir); - void visit(ir_discard *ir); - void visit(ir_call *ir); - void visit(ir_function *ir); - void visit(ir_function_signature *ir); - void visit(ir_emit_vertex *); - void visit(ir_end_primitive *); + void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); + void compute_clip_distance(gl_clip_plane *clip_planes); uint32_t gather_channel(int orig_chan, uint32_t sampler); void swizzle_result(ir_texture_opcode op, int dest_components, fs_reg orig_val, uint32_t sampler); - fs_inst *emit(fs_inst *inst); - void emit(exec_list list); - - fs_inst *emit(enum opcode opcode); - fs_inst *emit(enum opcode opcode, const fs_reg &dst); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); - fs_inst *emit(enum opcode opcode, const fs_reg &dst, - fs_reg src[], int sources); - - fs_inst *MOV(const fs_reg &dst, const fs_reg &src); - fs_inst *NOT(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDD(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDE(const fs_reg &dst, const fs_reg &src); - fs_inst *RNDZ(const fs_reg &dst, const fs_reg &src); - fs_inst *FRC(const fs_reg &dst, const fs_reg &src); - fs_inst *ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MUL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MACH(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *MAC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SHL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SHR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *ASR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *AND(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *OR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *XOR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *IF(enum brw_predicate predicate); - fs_inst *IF(const fs_reg &src0, const fs_reg &src1, - enum brw_conditional_mod condition); - fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, - enum brw_conditional_mod condition); - fs_inst *LRP(const fs_reg &dst, const fs_reg &a, const fs_reg &y, - const fs_reg &x); - fs_inst *DEP_RESOLVE_MOV(int grf); - fs_inst *BFREV(const fs_reg &dst, const fs_reg &value); - fs_inst *BFE(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset, - const fs_reg &value); - fs_inst *BFI1(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset); - fs_inst *BFI2(const fs_reg &dst, const fs_reg &bfi1_dst, - const fs_reg &insert, const fs_reg &base); - fs_inst *FBH(const fs_reg &dst, const fs_reg &value); - fs_inst *FBL(const fs_reg &dst, const fs_reg &value); - fs_inst *CBIT(const fs_reg &dst, const fs_reg &value); - fs_inst *MAD(const fs_reg &dst, const fs_reg &c, const fs_reg &b, - const fs_reg &a); - fs_inst *ADDC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SUBB(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst *SEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - int type_size(const struct glsl_type *type); fs_inst *get_instruction_generating_reg(fs_inst *start, fs_inst *end, const fs_reg ®); - fs_inst *LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources, - int header_size); - - exec_list VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, - const fs_reg &surf_index, - const fs_reg &varying_offset, - uint32_t const_offset); + void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld, + const fs_reg &dst, + const fs_reg &surf_index, + const fs_reg &varying_offset, + uint32_t const_offset); + void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); - bool run_fs(); - bool run_vs(); + bool run_fs(bool do_rep_send); + bool run_vs(gl_clip_plane *clip_planes); bool run_cs(); void optimize(); void allocate_registers(); @@ -213,11 +120,8 @@ public: void assign_vs_urb_setup(); bool assign_regs(bool allow_spilling); void assign_regs_trivial(); - void get_used_mrfs(bool *mrf_used); void setup_payload_interference(struct ra_graph *g, int payload_reg_count, int first_payload_node); - void setup_mrf_hack_interference(struct ra_graph *g, - int first_mrf_hack_node); int choose_spill_reg(struct ra_graph *g); void spill_reg(int spill_reg); void split_virtual_grfs(); @@ -254,9 +158,10 @@ public: fs_inst *inst); void vfail(const char *msg, va_list args); void fail(const char *msg, ...); - void no16(const char *msg, ...); + void no16(const char *msg); void lower_uniform_pull_constant_loads(); bool lower_load_payload(); + bool lower_integer_multiplication(); bool opt_combine_constants(); void emit_dummy_fs(); @@ -318,58 +223,18 @@ public: fs_reg emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); void resolve_source_modifiers(fs_reg *src); - fs_reg fix_math_operand(fs_reg src); - fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); - fs_inst *emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, - const fs_reg &a); - void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1); void emit_discard_jump(); - /** Copy any live channel from \p src to the first channel of \p dst. */ - void emit_uniformize(const fs_reg &dst, const fs_reg &src); - bool try_emit_b2f_of_comparison(ir_expression *ir); - bool try_emit_saturate(ir_expression *ir); - bool try_emit_line(ir_expression *ir); - bool try_emit_mad(ir_expression *ir); bool try_replace_with_sel(); - bool try_opt_frontfacing_ternary(ir_if *ir); bool opt_peephole_sel(); bool opt_peephole_predicated_break(); bool opt_saturate_propagation(); bool opt_cmod_propagation(); bool opt_zero_samples(); - void emit_bool_to_cond_code(ir_rvalue *condition); - void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]); - void emit_if_gen6(ir_if *ir); void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); - void emit_fragment_program_code(); - void setup_fp_regs(); - fs_reg get_fp_src_reg(const prog_src_register *src); - fs_reg get_fp_dst_reg(const prog_dst_register *dst); - void emit_fp_alu1(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src); - void emit_fp_alu2(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1); - void emit_fp_scalar_write(const struct prog_instruction *fpi, - fs_reg dst, fs_reg src); - void emit_fp_scalar_math(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src); - - void emit_fp_minmax(const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1); - - void emit_fp_sop(enum brw_conditional_mod conditional_mod, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one); - void emit_nir_code(); void nir_setup_inputs(nir_shader *shader); void nir_setup_outputs(nir_shader *shader); @@ -383,13 +248,17 @@ public: void nir_emit_loop(nir_loop *loop); void nir_emit_block(nir_block *block); void nir_emit_instr(nir_instr *instr); - void nir_emit_alu(nir_alu_instr *instr); - void nir_emit_intrinsic(nir_intrinsic_instr *instr); - void nir_emit_texture(nir_tex_instr *instr); - void nir_emit_jump(nir_jump_instr *instr); + void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr); + void nir_emit_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + void nir_emit_texture(const brw::fs_builder &bld, + nir_tex_instr *instr); + void nir_emit_jump(const brw::fs_builder &bld, + nir_jump_instr *instr); fs_reg get_nir_src(nir_src src); fs_reg get_nir_dest(nir_dest dest); - void emit_percomp(fs_inst *inst, unsigned wr_mask); + void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, + unsigned wr_mask); bool optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result); @@ -397,16 +266,21 @@ public: void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, unsigned exec_size, bool use_2nd_half); void emit_alpha_test(); - fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2, + fs_inst *emit_single_fb_write(const brw::fs_builder &bld, + fs_reg color1, fs_reg color2, fs_reg src0_alpha, unsigned components, unsigned exec_size, bool use_2nd_half = false); void emit_fb_writes(); - void emit_urb_writes(); + void emit_urb_writes(gl_clip_plane *clip_planes); void emit_cs_terminate(); + void emit_barrier(); + void emit_shader_time_begin(); void emit_shader_time_end(); - fs_inst *SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value); + void SHADER_TIME_ADD(const brw::fs_builder &bld, + int shader_time_subindex, + fs_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg dst, fs_reg offset, fs_reg src0, @@ -415,23 +289,9 @@ public: void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, fs_reg offset); - void emit_interpolate_expression(ir_expression *ir); - - bool try_rewrite_rhs_to_dst(ir_assignment *ir, - fs_reg dst, - fs_reg src, - fs_inst *pre_rhs_inst, - fs_inst *last_rhs_inst); - void emit_assignment_writes(fs_reg &l, fs_reg &r, - const glsl_type *type, bool predicated); - void resolve_ud_negate(fs_reg *reg); - void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg); - - fs_reg get_timestamp(fs_inst **out_mov); + fs_reg get_timestamp(const brw::fs_builder &bld); struct brw_reg interp_reg(int location, int channel); - void setup_uniform_values(ir_variable *ir); - void setup_builtin_uniform_values(ir_variable *ir); int implied_mrf_writes(fs_inst *inst); virtual void dump_instructions(); @@ -439,8 +299,6 @@ public: void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); - void visit_atomic_counter_intrinsic(ir_call *ir); - const void *const key; const struct brw_sampler_prog_key_data *key_tex; @@ -476,7 +334,6 @@ public: */ int *push_constant_loc; - struct hash_table *variable_ht; fs_reg frag_depth; fs_reg sample_mask; fs_reg outputs[VARYING_SLOT_MAX]; @@ -487,26 +344,18 @@ public: /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ unsigned max_grf; - fs_reg *fp_temp_regs; - fs_reg *fp_input_regs; - fs_reg *nir_locals; fs_reg *nir_globals; fs_reg nir_inputs; fs_reg nir_outputs; fs_reg *nir_system_values; - /** @{ debug annotation info */ - const char *current_annotation; - const void *base_ir; - /** @} */ - bool failed; char *fail_msg; bool simd16_unsupported; char *no16_msg; - /* Result of last visit() method. */ + /* Result of last visit() method. Still used by emit_texture() */ fs_reg result; /** Register numbers for thread payload fields. */ @@ -539,7 +388,10 @@ public: const unsigned dispatch_width; /**< 8 or 16 */ + int shader_time_index; + unsigned promoted_constants; + brw::fs_builder bld; }; /** @@ -550,7 +402,7 @@ public: class fs_generator { public: - fs_generator(struct brw_context *brw, + fs_generator(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, @@ -572,6 +424,7 @@ private: void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); + void generate_barrier(fs_inst *inst, struct brw_reg src); void generate_blorp_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); @@ -644,7 +497,9 @@ private: bool patch_discard_jumps_to_fb_writes(); - struct brw_context *brw; + const struct brw_compiler *compiler; + void *log_data; /* Passed to compiler->*_log functions */ + const struct brw_device_info *devinfo; struct brw_codegen *p; diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h new file mode 100644 index 00000000000..58ac5980da5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -0,0 +1,652 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_FS_BUILDER_H +#define BRW_FS_BUILDER_H + +#include "brw_ir_fs.h" +#include "brw_shader.h" +#include "brw_context.h" + +namespace brw { + /** + * Toolbox to assemble an FS IR program out of individual instructions. + * + * This object is meant to have an interface consistent with + * brw::vec4_builder. They cannot be fully interchangeable because + * brw::fs_builder generates scalar code while brw::vec4_builder generates + * vector code. + */ + class fs_builder { + public: + /** Type used in this IR to represent a source of an instruction. */ + typedef fs_reg src_reg; + + /** Type used in this IR to represent the destination of an instruction. */ + typedef fs_reg dst_reg; + + /** Type used in this IR to represent an instruction. */ + typedef fs_inst instruction; + + /** + * Construct an fs_builder that inserts instructions into \p shader. + * \p dispatch_width gives the native execution width of the program. + */ + fs_builder(backend_shader *shader, + unsigned dispatch_width) : + shader(shader), block(NULL), cursor(NULL), + _dispatch_width(dispatch_width), + _group(0), + force_writemask_all(false), + annotation() + { + } + + /** + * Construct an fs_builder that inserts instructions before \p cursor in + * basic block \p block, inheriting other code generation parameters + * from this. + */ + fs_builder + at(bblock_t *block, exec_node *cursor) const + { + fs_builder bld = *this; + bld.block = block; + bld.cursor = cursor; + return bld; + } + + /** + * Construct an fs_builder appending instructions at the end of the + * instruction list of the shader, inheriting other code generation + * parameters from this. + */ + fs_builder + at_end() const + { + return at(NULL, (exec_node *)&shader->instructions.tail); + } + + /** + * Construct a builder specifying the default SIMD width and group of + * channel enable signals, inheriting other code generation parameters + * from this. + * + * \p n gives the default SIMD width, \p i gives the slot group used for + * predication and control flow masking in multiples of \p n channels. + */ + fs_builder + group(unsigned n, unsigned i) const + { + assert(n <= dispatch_width() && + i < dispatch_width() / n); + fs_builder bld = *this; + bld._dispatch_width = n; + bld._group += i * n; + return bld; + } + + /** + * Alias for group() with width equal to eight. + */ + fs_builder + half(unsigned i) const + { + return group(8, i); + } + + /** + * Construct a builder with per-channel control flow execution masking + * disabled if \p b is true. If control flow execution masking is + * already disabled this has no effect. + */ + fs_builder + exec_all(bool b = true) const + { + fs_builder bld = *this; + if (b) + bld.force_writemask_all = true; + return bld; + } + + /** + * Construct a builder with the given debug annotation info. + */ + fs_builder + annotate(const char *str, const void *ir = NULL) const + { + fs_builder bld = *this; + bld.annotation.str = str; + bld.annotation.ir = ir; + return bld; + } + + /** + * Get the SIMD width in use. + */ + unsigned + dispatch_width() const + { + return _dispatch_width; + } + + /** + * Allocate a virtual register of natural vector size (one for this IR) + * and SIMD width. \p n gives the amount of space to allocate in + * dispatch_width units (which is just enough space for one logical + * component in this IR). + */ + dst_reg + vgrf(enum brw_reg_type type, unsigned n = 1) const + { + return dst_reg(GRF, shader->alloc.allocate( + DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), + REG_SIZE)), + type, dispatch_width()); + } + + /** + * Create a null register of floating type. + */ + dst_reg + null_reg_f() const + { + return dst_reg(retype(brw_null_vec(dispatch_width()), + BRW_REGISTER_TYPE_F)); + } + + /** + * Create a null register of signed integer type. + */ + dst_reg + null_reg_d() const + { + return dst_reg(retype(brw_null_vec(dispatch_width()), + BRW_REGISTER_TYPE_D)); + } + + /** + * Create a null register of unsigned integer type. + */ + dst_reg + null_reg_ud() const + { + return dst_reg(retype(brw_null_vec(dispatch_width()), + BRW_REGISTER_TYPE_UD)); + } + + /** + * Get the mask of SIMD channels enabled by dispatch and not yet + * disabled by discard. + */ + src_reg + sample_mask_reg() const + { + const bool uses_kill = + (shader->stage == MESA_SHADER_FRAGMENT && + ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill); + return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) : + uses_kill ? brw_flag_reg(0, 1) : + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); + } + + /** + * Insert an instruction into the program. + */ + instruction * + emit(const instruction &inst) const + { + return emit(new(shader->mem_ctx) instruction(inst)); + } + + /** + * Create and insert a nullary control instruction into the program. + */ + instruction * + emit(enum opcode opcode) const + { + return emit(instruction(opcode, dispatch_width())); + } + + /** + * Create and insert a nullary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst) const + { + return emit(instruction(opcode, dst)); + } + + /** + * Create and insert a unary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const + { + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + return fix_math_instruction( + emit(instruction(opcode, dst.width, dst, + fix_math_operand(src0)))); + + default: + return emit(instruction(opcode, dst.width, dst, src0)); + } + } + + /** + * Create and insert a binary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, + const src_reg &src1) const + { + switch (opcode) { + case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + return fix_math_instruction( + emit(instruction(opcode, dst.width, dst, + fix_math_operand(src0), + fix_math_operand(src1)))); + + default: + return emit(instruction(opcode, dst.width, dst, src0, src1)); + + } + } + + /** + * Create and insert a ternary instruction into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, + const src_reg &src1, const src_reg &src2) const + { + switch (opcode) { + case BRW_OPCODE_BFE: + case BRW_OPCODE_BFI2: + case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: + return emit(instruction(opcode, dst.width, dst, + fix_3src_operand(src0), + fix_3src_operand(src1), + fix_3src_operand(src2))); + + default: + return emit(instruction(opcode, dst.width, dst, src0, src1, src2)); + } + } + + /** + * Insert a preallocated instruction into the program. + */ + instruction * + emit(instruction *inst) const + { + assert(inst->exec_size == dispatch_width() || + force_writemask_all); + assert(_group == 0 || _group == 8); + + inst->force_sechalf = (_group == 8); + inst->force_writemask_all = force_writemask_all; + inst->annotation = annotation.str; + inst->ir = annotation.ir; + + if (block) + static_cast<instruction *>(cursor)->insert_before(block, inst); + else + cursor->insert_before(inst); + + return inst; + } + + /** + * Select \p src0 if the comparison of both sources with the given + * conditional mod evaluates to true, otherwise select \p src1. + * + * Generally useful to get the minimum or maximum of two values. + */ + void + emit_minmax(const dst_reg &dst, const src_reg &src0, + const src_reg &src1, brw_conditional_mod mod) const + { + if (shader->devinfo->gen >= 6) { + set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); + } else { + CMP(null_reg_d(), src0, src1, mod); + set_predicate(BRW_PREDICATE_NORMAL, + SEL(dst, src0, src1)); + } + } + + /** + * Copy any live channel from \p src to the first channel of \p dst. + */ + void + emit_uniformize(const dst_reg &dst, const src_reg &src) const + { + const fs_builder ubld = exec_all(); + const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); + + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0)); + ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0), + src, component(chan_index, 0)); + } + + /** + * Assorted arithmetic ops. + * @{ + */ +#define ALU1(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0) const \ + { \ + return emit(BRW_OPCODE_##op, dst, src0); \ + } + +#define ALU2(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ + { \ + return emit(BRW_OPCODE_##op, dst, src0, src1); \ + } + +#define ALU2_ACC(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ + { \ + instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \ + inst->writes_accumulator = true; \ + return inst; \ + } + +#define ALU3(op) \ + instruction * \ + op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \ + const src_reg &src2) const \ + { \ + return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \ + } + + ALU2(ADD) + ALU2_ACC(ADDC) + ALU2(AND) + ALU2(ASR) + ALU2(AVG) + ALU3(BFE) + ALU2(BFI1) + ALU3(BFI2) + ALU1(BFREV) + ALU1(CBIT) + ALU2(CMPN) + ALU3(CSEL) + ALU2(DP2) + ALU2(DP3) + ALU2(DP4) + ALU2(DPH) + ALU1(F16TO32) + ALU1(F32TO16) + ALU1(FBH) + ALU1(FBL) + ALU1(FRC) + ALU2(LINE) + ALU1(LZD) + ALU2(MAC) + ALU2_ACC(MACH) + ALU3(MAD) + ALU1(MOV) + ALU2(MUL) + ALU1(NOT) + ALU2(OR) + ALU2(PLN) + ALU1(RNDD) + ALU1(RNDE) + ALU1(RNDU) + ALU1(RNDZ) + ALU2(SAD2) + ALU2_ACC(SADA2) + ALU2(SEL) + ALU2(SHL) + ALU2(SHR) + ALU2_ACC(SUBB) + ALU2(XOR) + +#undef ALU3 +#undef ALU2_ACC +#undef ALU2 +#undef ALU1 + /** @} */ + + /** + * CMP: Sets the low bit of the destination channels with the result + * of the comparison, while the upper bits are undefined, and updates + * the flag register with the packed 16 bits of the result. + */ + instruction * + CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1, + brw_conditional_mod condition) const + { + /* Take the instruction: + * + * CMP null<d> src0<f> src1<f> + * + * Original gen4 does type conversion to the destination type + * before comparison, producing garbage results for floating + * point comparisons. + * + * The destination type doesn't matter on newer generations, + * so we set the type to match src0 so we can compact the + * instruction. + */ + return set_condmod(condition, + emit(BRW_OPCODE_CMP, retype(dst, src0.type), + fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); + } + + /** + * Gen4 predicated IF. + */ + instruction * + IF(brw_predicate predicate) const + { + return set_predicate(predicate, emit(BRW_OPCODE_IF)); + } + + /** + * Emit a linear interpolation instruction. + */ + instruction * + LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, + const src_reg &a) const + { + if (shader->devinfo->gen >= 6) { + /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so + * we need to reorder the operands. + */ + return emit(BRW_OPCODE_LRP, dst, a, y, x); + + } else { + /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ + const dst_reg y_times_a = vgrf(dst.type); + const dst_reg one_minus_a = vgrf(dst.type); + const dst_reg x_times_one_minus_a = vgrf(dst.type); + + MUL(y_times_a, y, a); + ADD(one_minus_a, negate(a), src_reg(1.0f)); + MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); + return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); + } + } + + /** + * Collect a number of registers in a contiguous range of registers. + */ + instruction * + LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src, + unsigned sources, unsigned header_size) const + { + assert(dst.width % 8 == 0); + instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD, + dst.width, dst, src, sources)); + inst->header_size = header_size; + + for (unsigned i = 0; i < header_size; i++) + assert(src[i].file != GRF || + src[i].width * type_sz(src[i].type) == 32); + inst->regs_written = header_size; + + for (unsigned i = header_size; i < sources; ++i) + assert(src[i].file != GRF || + src[i].width == dst.width); + inst->regs_written += (sources - header_size) * (dst.width / 8); + + return inst; + } + + backend_shader *shader; + + private: + /** + * Workaround for negation of UD registers. See comment in + * fs_generator::generate_code() for more details. + */ + src_reg + fix_unsigned_negate(const src_reg &src) const + { + if (src.type == BRW_REGISTER_TYPE_UD && + src.negate) { + dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD); + MOV(temp, src); + return src_reg(temp); + } else { + return src; + } + } + + /** + * Workaround for source register modes not supported by the ternary + * instruction encoding. + */ + src_reg + fix_3src_operand(const src_reg &src) const + { + if (src.file == GRF || src.file == UNIFORM || src.stride > 1) { + return src; + } else { + dst_reg expanded = vgrf(src.type); + MOV(expanded, src); + return expanded; + } + } + + /** + * Workaround for source register modes not supported by the math + * instruction. + */ + src_reg + fix_math_operand(const src_reg &src) const + { + /* Can't do hstride == 0 args on gen6 math, so expand it out. We + * might be able to do better by doing execsize = 1 math and then + * expanding that result out, but we would need to be careful with + * masking. + * + * Gen6 hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. + * + * Gen7 relaxes most of the above restrictions, but still can't use IMM + * operands to math + */ + if ((shader->devinfo->gen == 6 && + (src.file == IMM || src.file == UNIFORM || + src.abs || src.negate)) || + (shader->devinfo->gen == 7 && src.file == IMM)) { + const dst_reg tmp = vgrf(src.type); + MOV(tmp, src); + return tmp; + } else { + return src; + } + } + + /** + * Workaround other weirdness of the math instruction. + */ + instruction * + fix_math_instruction(instruction *inst) const + { + if (shader->devinfo->gen < 6) { + inst->base_mrf = 2; + inst->mlen = inst->sources * dispatch_width() / 8; + + if (inst->sources > 1) { + /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 + * "Message Payload": + * + * "Operand0[7]. For the INT DIV functions, this operand is the + * denominator." + * ... + * "Operand1[7]. For the INT DIV functions, this operand is the + * numerator." + */ + const bool is_int_div = inst->opcode != SHADER_OPCODE_POW; + const fs_reg src0 = is_int_div ? inst->src[1] : inst->src[0]; + const fs_reg src1 = is_int_div ? inst->src[0] : inst->src[1]; + + inst->resize_sources(1); + inst->src[0] = src0; + + at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type, + dispatch_width()), src1); + } + } + + return inst; + } + + bblock_t *block; + exec_node *cursor; + + unsigned _dispatch_width; + unsigned _group; + bool force_writemask_all; + + /** Debug annotation info. */ + struct { + const char *str; + const void *ir; + } annotation; + }; +} + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index aa62031df73..0af5a915c9f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -38,6 +38,8 @@ #include "brw_fs_live_variables.h" #include "brw_cfg.h" +using namespace brw; + /* Returns whether an instruction could co-issue if its immediate source were * replaced with a GRF source. */ @@ -270,15 +272,14 @@ fs_visitor::opt_combine_constants() reg.stride = 0; for (int i = 0; i < table.len; i++) { struct imm *imm = &table.imm[i]; - - fs_inst *mov = MOV(reg, fs_reg(imm->val)); - mov->force_writemask_all = true; - if (imm->inst) { - imm->inst->insert_before(imm->block, mov); - } else { - backend_instruction *inst = imm->block->last_non_control_flow_inst(); - inst->insert_after(imm->block, mov); - } + /* Insert it either before the instruction that generated the immediate + * or after the last non-control flow instruction of the common ancestor. + */ + exec_node *n = (imm->inst ? imm->inst : + imm->block->last_non_control_flow_inst()->next); + const fs_builder ibld = bld.at(imm->block, n).exec_all(); + + ibld.MOV(reg, fs_reg(imm->val)); imm->reg = reg.reg; imm->subreg_offset = reg.subreg_offset; diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 52bfa921dc3..c92aae4b1d6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -541,8 +541,16 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) /* Fit this constant in by commuting the operands. * Exception: we can't do this for 32-bit integer MUL/MACH * because it's asymmetric. + * + * The BSpec says for Broadwell that + * + * "When multiplying DW x DW, the dst cannot be accumulator." + * + * Integer MUL with a non-accumulator destination will be lowered + * by lower_integer_multiplication(), so don't restrict it. */ - if ((inst->opcode == BRW_OPCODE_MUL || + if (((inst->opcode == BRW_OPCODE_MUL && + inst->dst.is_accumulator()) || inst->opcode == BRW_OPCODE_MACH) && (inst->src[1].type == BRW_REGISTER_TYPE_D || inst->src[1].type == BRW_REGISTER_TYPE_UD)) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index db01f8cf7ab..70f0217b93d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -32,6 +32,8 @@ * 13.1 (p378). */ +using namespace brw; + namespace { struct aeb_entry : public exec_node { /** The instruction that generates the expression value. */ @@ -152,28 +154,34 @@ static bool instructions_match(fs_inst *a, fs_inst *b, bool *negate) { return a->opcode == b->opcode && + a->force_writemask_all == b->force_writemask_all && + a->exec_size == b->exec_size && + a->force_sechalf == b->force_sechalf && a->saturate == b->saturate && a->predicate == b->predicate && a->predicate_inverse == b->predicate_inverse && a->conditional_mod == b->conditional_mod && + a->flag_subreg == b->flag_subreg && a->dst.type == b->dst.type && + a->offset == b->offset && + a->mlen == b->mlen && + a->regs_written == b->regs_written && + a->base_mrf == b->base_mrf && + a->eot == b->eot && + a->header_size == b->header_size && + a->shadow_compare == b->shadow_compare && + a->pi_noperspective == b->pi_noperspective && a->sources == b->sources && - (a->is_tex() ? (a->offset == b->offset && - a->mlen == b->mlen && - a->regs_written == b->regs_written && - a->base_mrf == b->base_mrf && - a->eot == b->eot && - a->header_size == b->header_size && - a->shadow_compare == b->shadow_compare) - : true) && operands_match(a, b, negate); } -static fs_inst * -create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate) +static void +create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) { int written = inst->regs_written; int dst_width = inst->dst.width / 8; + const fs_builder ubld = bld.group(inst->exec_size, inst->force_sechalf) + .exec_all(inst->force_writemask_all); fs_inst *copy; if (written > dst_width) { @@ -189,7 +197,7 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate) } assert(src.file == GRF); - payload = ralloc_array(v->mem_ctx, fs_reg, sources); + payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); for (int i = 0; i < header_size; i++) { payload[i] = src; payload[i].width = 8; @@ -199,15 +207,12 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate) payload[i] = src; src = offset(src, 1); } - copy = v->LOAD_PAYLOAD(inst->dst, payload, sources, header_size); + copy = ubld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { - copy = v->MOV(inst->dst, src); - copy->force_writemask_all = inst->force_writemask_all; + copy = ubld.MOV(inst->dst, src); copy->src[0].negate = negate; } assert(copy->regs_written == written); - - return copy; } bool @@ -261,9 +266,8 @@ fs_visitor::opt_cse_local(bblock_t *block) entry->generator->dst.type, entry->generator->dst.width); - fs_inst *copy = create_copy_instr(this, entry->generator, - entry->tmp, false); - entry->generator->insert_after(block, copy); + create_copy_instr(bld.at(block, entry->generator->next), + entry->generator, entry->tmp, false); entry->generator->dst = entry->tmp; } @@ -274,9 +278,7 @@ fs_visitor::opt_cse_local(bblock_t *block) assert(inst->dst.width == entry->generator->dst.width); assert(inst->dst.type == entry->tmp.type); - fs_inst *copy = create_copy_instr(this, inst, - entry->tmp, negate); - inst->insert_before(block, copy); + create_copy_instr(bld.at(block, inst), inst, entry->tmp, negate); } /* Set our iterator so that next time through the loop inst->next diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp deleted file mode 100644 index 6518ff60c3b..00000000000 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ /dev/null @@ -1,742 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file brw_fs_fp.cpp - * - * Implementation of the compiler for GL_ARB_fragment_program shaders on top - * of the GLSL compiler backend. - */ - -#include "brw_context.h" -#include "brw_fs.h" - -void -fs_visitor::emit_fp_alu1(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) - emit(opcode, offset(dst, i), offset(src, i)); - } -} - -void -fs_visitor::emit_fp_alu2(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) - emit(opcode, offset(dst, i), - offset(src0, i), offset(src1, i)); - } -} - -void -fs_visitor::emit_fp_minmax(const prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1) -{ - enum brw_conditional_mod conditionalmod; - if (fpi->Opcode == OPCODE_MIN) - conditionalmod = BRW_CONDITIONAL_L; - else - conditionalmod = BRW_CONDITIONAL_GE; - - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - emit_minmax(conditionalmod, offset(dst, i), - offset(src0, i), offset(src1, i)); - } - } -} - -void -fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src0, fs_reg src1, - fs_reg one) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_inst *inst; - - emit(CMP(reg_null_d, offset(src0, i), offset(src1, i), - conditional_mod)); - - inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f)); - inst->predicate = BRW_PREDICATE_NORMAL; - } - } -} - -void -fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi, - fs_reg dst, fs_reg src) -{ - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) - emit(MOV(offset(dst, i), src)); - } -} - -void -fs_visitor::emit_fp_scalar_math(enum opcode opcode, - const struct prog_instruction *fpi, - fs_reg dst, fs_reg src) -{ - fs_reg temp = vgrf(glsl_type::float_type); - emit_math(opcode, temp, src); - emit_fp_scalar_write(fpi, dst, temp); -} - -void -fs_visitor::emit_fragment_program_code() -{ - setup_fp_regs(); - - /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just - * be: - * - * sel.f0 dst 1.0 0.0 - * - * instead of - * - * mov dst 0.0 - * mov.f0 dst 1.0 - */ - fs_reg one = vgrf(glsl_type::float_type); - emit(MOV(one, fs_reg(1.0f))); - - for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) { - const struct prog_instruction *fpi = &prog->Instructions[insn]; - base_ir = fpi; - - fs_reg dst; - fs_reg src[3]; - - /* We always emit into a temporary destination register to avoid - * aliasing issues. - */ - dst = vgrf(glsl_type::vec4_type); - - for (int i = 0; i < 3; i++) - src[i] = get_fp_src_reg(&fpi->SrcReg[i]); - - switch (fpi->Opcode) { - case OPCODE_ABS: - src[0].abs = true; - src[0].negate = false; - emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); - break; - - case OPCODE_ADD: - emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]); - break; - - case OPCODE_CMP: - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_inst *inst; - - emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f), - BRW_CONDITIONAL_L)); - - inst = emit(BRW_OPCODE_SEL, offset(dst, i), - offset(src[1], i), offset(src[2], i)); - inst->predicate = BRW_PREDICATE_NORMAL; - } - } - break; - - case OPCODE_COS: - emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]); - break; - - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_DPH: { - fs_reg mul = vgrf(glsl_type::float_type); - fs_reg acc = vgrf(glsl_type::float_type); - int count; - - switch (fpi->Opcode) { - case OPCODE_DP2: count = 2; break; - case OPCODE_DP3: count = 3; break; - case OPCODE_DP4: count = 4; break; - case OPCODE_DPH: count = 3; break; - default: unreachable("not reached"); - } - - emit(MUL(acc, offset(src[0], 0), offset(src[1], 0))); - for (int i = 1; i < count; i++) { - emit(MUL(mul, offset(src[0], i), offset(src[1], i))); - emit(ADD(acc, acc, mul)); - } - - if (fpi->Opcode == OPCODE_DPH) - emit(ADD(acc, acc, offset(src[1], 3))); - - emit_fp_scalar_write(fpi, dst, acc); - break; - } - - case OPCODE_DST: - if (fpi->DstReg.WriteMask & WRITEMASK_X) - emit(MOV(dst, fs_reg(1.0f))); - if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit(MUL(offset(dst, 1), - offset(src[0], 1), offset(src[1], 1))); - } - if (fpi->DstReg.WriteMask & WRITEMASK_Z) - emit(MOV(offset(dst, 2), offset(src[0], 2))); - if (fpi->DstReg.WriteMask & WRITEMASK_W) - emit(MOV(offset(dst, 3), offset(src[1], 3))); - break; - - case OPCODE_EX2: - emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]); - break; - - case OPCODE_FLR: - emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]); - break; - - case OPCODE_FRC: - emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]); - break; - - case OPCODE_KIL: { - for (int i = 0; i < 4; i++) { - /* In most cases the argument to a KIL will be something like - * TEMP[0].wwww, so there's no point in checking whether .w is < 0 - * 4 times in a row. - */ - if (i > 0 && - GET_SWZ(fpi->SrcReg[0].Swizzle, i) == - GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) && - ((fpi->SrcReg[0].Negate >> i) & 1) == - ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) { - continue; - } - - - /* Emit an instruction that's predicated on the current - * undiscarded pixels, and updates just those pixels to be - * turned off. - */ - fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i), - fs_reg(0.0f), BRW_CONDITIONAL_GE)); - cmp->predicate = BRW_PREDICATE_NORMAL; - cmp->flag_subreg = 1; - - if (devinfo->gen >= 6) - emit_discard_jump(); - } - break; - } - - case OPCODE_LG2: - emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]); - break; - - case OPCODE_LIT: - /* From the ARB_fragment_program spec: - * - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * Note that we don't do the clamping to +/- 128. We didn't in - * brw_wm_emit.c either. - */ - if (fpi->DstReg.WriteMask & WRITEMASK_X) - emit(MOV(offset(dst, 0), fs_reg(1.0f))); - - if (fpi->DstReg.WriteMask & WRITEMASK_YZ) { - fs_inst *inst; - emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f), - BRW_CONDITIONAL_LE)); - - if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit(MOV(offset(dst, 1), offset(src[0], 0))); - inst = emit(MOV(offset(dst, 1), fs_reg(0.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - - if (fpi->DstReg.WriteMask & WRITEMASK_Z) { - emit_math(SHADER_OPCODE_POW, offset(dst, 2), - offset(src[0], 1), offset(src[0], 3)); - - inst = emit(MOV(offset(dst, 2), fs_reg(0.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - } - - if (fpi->DstReg.WriteMask & WRITEMASK_W) - emit(MOV(offset(dst, 3), fs_reg(1.0f))); - - break; - - case OPCODE_LRP: - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_reg a = offset(src[0], i); - fs_reg y = offset(src[1], i); - fs_reg x = offset(src[2], i); - emit_lrp(offset(dst, i), x, y, a); - } - } - break; - - case OPCODE_MAD: - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - if (devinfo->gen >= 6) { - emit(MAD(offset(dst, i), offset(src[2], i), - offset(src[1], i), offset(src[0], i))); - } else { - fs_reg temp = vgrf(glsl_type::float_type); - emit(MUL(temp, offset(src[0], i), offset(src[1], i))); - emit(ADD(offset(dst, i), temp, offset(src[2], i))); - } - } - } - break; - - case OPCODE_MAX: - emit_fp_minmax(fpi, dst, src[0], src[1]); - break; - - case OPCODE_MOV: - emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); - break; - - case OPCODE_MIN: - emit_fp_minmax(fpi, dst, src[0], src[1]); - break; - - case OPCODE_MUL: - emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]); - break; - - case OPCODE_POW: { - fs_reg temp = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]); - emit_fp_scalar_write(fpi, dst, temp); - break; - } - - case OPCODE_RCP: - emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]); - break; - - case OPCODE_RSQ: - emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]); - break; - - case OPCODE_SCS: - if (fpi->DstReg.WriteMask & WRITEMASK_X) { - emit_math(SHADER_OPCODE_COS, offset(dst, 0), - offset(src[0], 0)); - } - - if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - emit_math(SHADER_OPCODE_SIN, offset(dst, 1), - offset(src[0], 1)); - } - break; - - case OPCODE_SGE: - emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one); - break; - - case OPCODE_SIN: - emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]); - break; - - case OPCODE_SLT: - emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one); - break; - - case OPCODE_SUB: { - fs_reg neg_src1 = src[1]; - neg_src1.negate = !src[1].negate; - - emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1); - break; - } - - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: { - ir_texture_opcode op; - fs_reg lod; - fs_reg dpdy; - fs_reg coordinate = src[0]; - fs_reg shadow_c; - fs_reg sample_index; - fs_reg texel_offset; /* No offsets; leave as BAD_FILE. */ - - switch (fpi->Opcode) { - case OPCODE_TEX: - op = ir_tex; - break; - case OPCODE_TXP: { - op = ir_tex; - - coordinate = vgrf(glsl_type::vec3_type); - fs_reg invproj = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3)); - for (int i = 0; i < 3; i++) { - emit(MUL(offset(coordinate, i), - offset(src[0], i), invproj)); - } - break; - } - case OPCODE_TXB: - op = ir_txb; - lod = offset(src[0], 3); - break; - default: - unreachable("not reached"); - } - - int coord_components; - switch (fpi->TexSrcTarget) { - case TEXTURE_1D_INDEX: - coord_components = 1; - break; - - case TEXTURE_2D_INDEX: - case TEXTURE_1D_ARRAY_INDEX: - case TEXTURE_RECT_INDEX: - case TEXTURE_EXTERNAL_INDEX: - coord_components = 2; - break; - - case TEXTURE_3D_INDEX: - case TEXTURE_2D_ARRAY_INDEX: - coord_components = 3; - break; - - case TEXTURE_CUBE_INDEX: { - coord_components = 3; - - fs_reg temp = vgrf(glsl_type::float_type); - fs_reg cubecoord = vgrf(glsl_type::vec3_type); - fs_reg abscoord = coordinate; - abscoord.negate = false; - abscoord.abs = true; - emit_minmax(BRW_CONDITIONAL_GE, temp, - offset(abscoord, 0), offset(abscoord, 1)); - emit_minmax(BRW_CONDITIONAL_GE, temp, - temp, offset(abscoord, 2)); - emit_math(SHADER_OPCODE_RCP, temp, temp); - for (int i = 0; i < 3; i++) { - emit(MUL(offset(cubecoord, i), - offset(coordinate, i), temp)); - } - - coordinate = cubecoord; - break; - } - - default: - unreachable("not reached"); - } - - if (fpi->TexShadow) - shadow_c = offset(coordinate, 2); - - emit_texture(op, glsl_type::vec4_type, coordinate, coord_components, - shadow_c, lod, dpdy, 0, sample_index, - reg_undef, /* offset */ - reg_undef, /* mcs */ - 0, /* gather component */ - false, /* is cube array */ - fpi->TexSrcTarget == TEXTURE_RECT_INDEX, - fpi->TexSrcUnit, fs_reg(fpi->TexSrcUnit), - fpi->TexSrcUnit); - dst = this->result; - - break; - } - - case OPCODE_SWZ: - /* Note that SWZ's extended swizzles are handled in the general - * get_src_reg() code. - */ - emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); - break; - - case OPCODE_XPD: - for (int i = 0; i < 3; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - int i1 = (i + 1) % 3; - int i2 = (i + 2) % 3; - - fs_reg temp = vgrf(glsl_type::float_type); - fs_reg neg_src1_1 = offset(src[1], i1); - neg_src1_1.negate = !neg_src1_1.negate; - emit(MUL(temp, offset(src[0], i2), neg_src1_1)); - emit(MUL(offset(dst, i), - offset(src[0], i1), offset(src[1], i2))); - emit(ADD(offset(dst, i), offset(dst, i), temp)); - } - } - break; - - case OPCODE_END: - break; - - default: - _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n", - _mesa_opcode_string(fpi->Opcode)); - } - - /* To handle saturates, we emit a MOV with a saturate bit, which - * optimization should fold into the preceding instructions when safe. - */ - if (_mesa_num_inst_dst_regs(fpi->Opcode) != 0) { - fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg); - - for (int i = 0; i < 4; i++) { - if (fpi->DstReg.WriteMask & (1 << i)) { - fs_inst *inst = emit(MOV(offset(real_dst, i), - offset(dst, i))); - inst->saturate = fpi->SaturateMode; - } - } - } - } - - /* Epilogue: - * - * Fragment depth has this strange convention of being the .z component of - * a vec4. emit_fb_write() wants to see a float value, instead. - */ - this->current_annotation = "result.depth write"; - if (frag_depth.file != BAD_FILE) { - fs_reg temp = vgrf(glsl_type::float_type); - emit(MOV(temp, offset(frag_depth, 2))); - frag_depth = temp; - } -} - -void -fs_visitor::setup_fp_regs() -{ - /* PROGRAM_TEMPORARY */ - int num_temp = prog->NumTemporaries; - fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp); - for (int i = 0; i < num_temp; i++) - fp_temp_regs[i] = vgrf(glsl_type::vec4_type); - - /* PROGRAM_STATE_VAR etc. */ - if (dispatch_width == 8) { - for (unsigned p = 0; - p < prog->Parameters->NumParameters; p++) { - for (unsigned int i = 0; i < 4; i++) { - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[p][i]; - } - } - } - - fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX); - for (int i = 0; i < VARYING_SLOT_MAX; i++) { - if (prog->InputsRead & BITFIELD64_BIT(i)) { - this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d", - i); - - switch (i) { - case VARYING_SLOT_POS: - { - assert(stage == MESA_SHADER_FRAGMENT); - gl_fragment_program *fp = (gl_fragment_program*) prog; - fp_input_regs[i] = - *emit_fragcoord_interpolation(fp->PixelCenterInteger, - fp->OriginUpperLeft); - } - break; - case VARYING_SLOT_FACE: - fp_input_regs[i] = *emit_frontfacing_interpolation(); - break; - default: - fp_input_regs[i] = vgrf(glsl_type::vec4_type); - emit_general_interpolation(fp_input_regs[i], "fp_input", - glsl_type::vec4_type, - INTERP_QUALIFIER_NONE, - i, false, false); - - if (i == VARYING_SLOT_FOGC) { - emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f))); - emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f))); - emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f))); - } - - break; - } - - this->current_annotation = NULL; - } - } -} - -fs_reg -fs_visitor::get_fp_dst_reg(const prog_dst_register *dst) -{ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - - switch (dst->File) { - case PROGRAM_TEMPORARY: - return fp_temp_regs[dst->Index]; - - case PROGRAM_OUTPUT: - if (dst->Index == FRAG_RESULT_DEPTH) { - if (frag_depth.file == BAD_FILE) - frag_depth = vgrf(glsl_type::vec4_type); - return frag_depth; - } else if (dst->Index == FRAG_RESULT_COLOR) { - if (outputs[0].file == BAD_FILE) { - outputs[0] = vgrf(glsl_type::vec4_type); - output_components[0] = 4; - - /* Tell emit_fb_writes() to smear fragment.color across all the - * color attachments. - */ - for (int i = 1; i < key->nr_color_regions; i++) { - outputs[i] = outputs[0]; - output_components[i] = output_components[0]; - } - } - return outputs[0]; - } else { - int output_index = dst->Index - FRAG_RESULT_DATA0; - if (outputs[output_index].file == BAD_FILE) { - outputs[output_index] = vgrf(glsl_type::vec4_type); - } - output_components[output_index] = 4; - return outputs[output_index]; - } - - case PROGRAM_UNDEFINED: - return fs_reg(); - - default: - _mesa_problem(ctx, "bad dst register file: %s\n", - _mesa_register_file_name((gl_register_file)dst->File)); - return vgrf(glsl_type::vec4_type); - } -} - -fs_reg -fs_visitor::get_fp_src_reg(const prog_src_register *src) -{ - struct gl_program_parameter_list *plist = prog->Parameters; - - fs_reg result; - - assert(!src->Abs); - - switch (src->File) { - case PROGRAM_UNDEFINED: - return fs_reg(); - case PROGRAM_TEMPORARY: - result = fp_temp_regs[src->Index]; - break; - - case PROGRAM_INPUT: - result = fp_input_regs[src->Index]; - break; - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - case PROGRAM_CONSTANT: - /* We actually want to look at the type in the Parameters list for this, - * because this lets us upload constant builtin uniforms, as actual - * constants. - */ - switch (plist->Parameters[src->Index].Type) { - case PROGRAM_CONSTANT: { - result = vgrf(glsl_type::vec4_type); - - for (int i = 0; i < 4; i++) { - emit(MOV(offset(result, i), - fs_reg(plist->ParameterValues[src->Index][i].f))); - } - break; - } - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - result = fs_reg(UNIFORM, src->Index * 4); - break; - - default: - _mesa_problem(ctx, "bad uniform src register file: %s\n", - _mesa_register_file_name((gl_register_file)src->File)); - return vgrf(glsl_type::vec4_type); - } - break; - - default: - _mesa_problem(ctx, "bad src register file: %s\n", - _mesa_register_file_name((gl_register_file)src->File)); - return vgrf(glsl_type::vec4_type); - } - - if (src->Swizzle != SWIZZLE_NOOP || src->Negate) { - fs_reg unswizzled = result; - result = vgrf(glsl_type::vec4_type); - for (int i = 0; i < 4; i++) { - bool negate = src->Negate & (1 << i); - /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ, - * but it costs us nothing to support it. - */ - int src_swiz = GET_SWZ(src->Swizzle, i); - if (src_swiz == SWIZZLE_ZERO) { - emit(MOV(offset(result, i), fs_reg(0.0f))); - } else if (src_swiz == SWIZZLE_ONE) { - emit(MOV(offset(result, i), - negate ? fs_reg(-1.0f) : fs_reg(1.0f))); - } else { - fs_reg src = offset(unswizzled, src_swiz); - if (negate) - src.negate = !src.negate; - emit(MOV(offset(result, i), src)); - } - } - } - - return result; -} diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index a99b7f75b26..2ed0bac6fd9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -121,7 +121,7 @@ brw_reg_from_fs_reg(fs_reg *reg) return brw_reg; } -fs_generator::fs_generator(struct brw_context *brw, +fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, @@ -130,7 +130,8 @@ fs_generator::fs_generator(struct brw_context *brw, bool runtime_check_aads_emit, const char *stage_abbrev) - : brw(brw), devinfo(brw->intelScreen->devinfo), key(key), + : compiler(compiler), log_data(log_data), + devinfo(compiler->devinfo), key(key), prog_data(prog_data), prog(prog), promoted_constants(promoted_constants), runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), @@ -401,6 +402,13 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) } void +fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src) +{ + brw_barrier(p, src); + brw_WAIT(p); +} + +void fs_generator::generate_blorp_fb_write(fs_inst *inst) { brw_fb_WRITE(p, @@ -779,27 +787,19 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_mark_surface_used(prog_data, sampler + base_binding_table_index); } else { /* Non-const sampler index */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - * addr = sampler & 0xff - * temp = (sampler << 8) & 0xf00 - * addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); @@ -941,6 +941,7 @@ fs_generator::generate_ddy(enum opcode opcode, brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_16); if (unroll_to_simd8) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); if (negate_value) { brw_ADD(p, firsthalf(dst), firsthalf(src1), negate(firsthalf(src0))); @@ -1600,10 +1601,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case 16: case 32: - if (type_sz(inst->dst.type) < sizeof(float)) - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - else + /* If the instruction writes to more than one register, it needs to + * be a "compressed" instruction on Gen <= 5. + */ + if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32) brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); + else + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); break; default: unreachable("Invalid instruction width"); @@ -2121,6 +2125,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_cs_terminate(inst, src[0]); break; + case SHADER_OPCODE_BARRIER: + generate_barrier(inst, src[0]); + break; + default: unreachable("Unsupported opcode"); @@ -2166,15 +2174,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) ralloc_free(annotation.ann); } - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s SIMD%d shader: %d inst, %d loops, %d:%d spills:fills, " - "Promoted %u constants, compacted %d to %d bytes.\n", - stage_abbrev, dispatch_width, before_size / 16, loop_count, - spill_count, fill_count, promoted_constants, before_size, after_size); + compiler->shader_debug_log(log_data, + "%s SIMD%d shader: %d inst, %d loops, " + "%d:%d spills:fills, Promoted %u constants, " + "compacted %d to %d bytes.\n", + stage_abbrev, dispatch_width, before_size / 16, + loop_count, spill_count, fill_count, + promoted_constants, before_size, after_size); return start_offset; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 270131a73d1..a378019af5b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -28,6 +28,8 @@ #include "brw_fs.h" #include "brw_nir.h" +using namespace brw; + void fs_visitor::emit_nir_code() { @@ -38,12 +40,12 @@ fs_visitor::emit_nir_code() */ if (nir->num_inputs > 0) { - nir_inputs = vgrf(nir->num_inputs); + nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs); nir_setup_inputs(nir); } if (nir->num_outputs > 0) { - nir_outputs = vgrf(nir->num_outputs); + nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); nir_setup_outputs(nir); } @@ -58,7 +60,7 @@ fs_visitor::emit_nir_code() unsigned array_elems = reg->num_array_elems == 0 ? 1 : reg->num_array_elems; unsigned size = array_elems * reg->num_components; - nir_globals[reg->index] = vgrf(size); + nir_globals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size); } /* get the main function and emit it */ @@ -93,8 +95,8 @@ fs_visitor::nir_setup_inputs(nir_shader *shader) unsigned array_length = var->type->is_array() ? var->type->length : 1; for (unsigned i = 0; i < array_length; i++) { for (unsigned j = 0; j < components; j++) { - emit(MOV(retype(offset(input, components * i + j), type), - offset(fs_reg(ATTR, var->data.location + i, type), j))); + bld.MOV(retype(offset(input, components * i + j), type), + offset(fs_reg(ATTR, var->data.location + i, type), j)); } } break; @@ -107,7 +109,7 @@ fs_visitor::nir_setup_inputs(nir_shader *shader) if (var->data.location == VARYING_SLOT_POS) { reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, var->data.origin_upper_left); - emit_percomp(MOV(input, reg), 0xF); + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, input, reg), 0xF); } else { emit_general_interpolation(input, var->name, var->type, (glsl_interp_qualifier) var->data.interpolation, @@ -218,9 +220,12 @@ fs_visitor::nir_setup_uniform(nir_variable *var) * our name. */ unsigned index = var->data.driver_location; - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { + for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + if (storage->builtin) + continue; + if (strncmp(var->name, storage->name, namelen) != 0 || (storage->name[namelen] != 0 && storage->name[namelen] != '.' && @@ -358,7 +363,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl) unsigned array_elems = reg->num_array_elems == 0 ? 1 : reg->num_array_elems; unsigned size = array_elems * reg->num_components; - nir_locals[reg->index] = vgrf(size); + nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size); } nir_emit_cf_list(&impl->body); @@ -392,21 +397,21 @@ void fs_visitor::nir_emit_if(nir_if *if_stmt) { /* first, put the condition into f0 */ - fs_inst *inst = emit(MOV(reg_null_d, + fs_inst *inst = bld.MOV(bld.null_reg_d(), retype(get_nir_src(if_stmt->condition), - BRW_REGISTER_TYPE_D))); + BRW_REGISTER_TYPE_D)); inst->conditional_mod = BRW_CONDITIONAL_NZ; - emit(IF(BRW_PREDICATE_NORMAL)); + bld.IF(BRW_PREDICATE_NORMAL); nir_emit_cf_list(&if_stmt->then_list); /* note: if the else is empty, dead CF elimination will remove it */ - emit(BRW_OPCODE_ELSE); + bld.emit(BRW_OPCODE_ELSE); nir_emit_cf_list(&if_stmt->else_list); - emit(BRW_OPCODE_ENDIF); + bld.emit(BRW_OPCODE_ENDIF); if (!try_replace_with_sel() && devinfo->gen < 6) { no16("Can't support (non-uniform) control flow on SIMD16\n"); @@ -420,11 +425,11 @@ fs_visitor::nir_emit_loop(nir_loop *loop) no16("Can't support (non-uniform) control flow on SIMD16\n"); } - emit(BRW_OPCODE_DO); + bld.emit(BRW_OPCODE_DO); nir_emit_cf_list(&loop->body); - emit(BRW_OPCODE_WHILE); + bld.emit(BRW_OPCODE_WHILE); } void @@ -438,19 +443,19 @@ fs_visitor::nir_emit_block(nir_block *block) void fs_visitor::nir_emit_instr(nir_instr *instr) { - this->base_ir = instr; + const fs_builder abld = bld.annotate(NULL, instr); switch (instr->type) { case nir_instr_type_alu: - nir_emit_alu(nir_instr_as_alu(instr)); + nir_emit_alu(abld, nir_instr_as_alu(instr)); break; case nir_instr_type_intrinsic: - nir_emit_intrinsic(nir_instr_as_intrinsic(instr)); + nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; case nir_instr_type_tex: - nir_emit_texture(nir_instr_as_tex(instr)); + nir_emit_texture(abld, nir_instr_as_tex(instr)); break; case nir_instr_type_load_const: @@ -460,14 +465,12 @@ fs_visitor::nir_emit_instr(nir_instr *instr) break; case nir_instr_type_jump: - nir_emit_jump(nir_instr_as_jump(instr)); + nir_emit_jump(abld, nir_instr_as_jump(instr)); break; default: unreachable("unknown instruction type"); } - - this->base_ir = NULL; } static brw_reg_type @@ -540,7 +543,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, tmp.subreg_offset = 2; tmp.stride = 2; - fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80))); + fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80)); or_inst->src[1].type = BRW_REGISTER_TYPE_UW; tmp.type = BRW_REGISTER_TYPE_D; @@ -565,15 +568,15 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, g1_6.negate = true; } - emit(OR(tmp, g1_6, fs_reg(0x3f800000))); + bld.OR(tmp, g1_6, fs_reg(0x3f800000)); } - emit(AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000))); + bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)); return true; } void -fs_visitor::nir_emit_alu(nir_alu_instr *instr) +fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) { struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; fs_inst *inst; @@ -605,7 +608,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (!instr->src[i].src.is_ssa && instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) { need_extra_copy = true; - temp = retype(vgrf(4), result.type); + temp = bld.vgrf(result.type, 4); break; } } @@ -615,11 +618,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) continue; if (instr->op == nir_op_imov || instr->op == nir_op_fmov) { - inst = emit(MOV(offset(temp, i), - offset(op[0], instr->src[0].swizzle[i]))); + inst = bld.MOV(offset(temp, i), + offset(op[0], instr->src[0].swizzle[i])); } else { - inst = emit(MOV(offset(temp, i), - offset(op[i], instr->src[i].swizzle[0]))); + inst = bld.MOV(offset(temp, i), + offset(op[i], instr->src[i].swizzle[0])); } inst->saturate = instr->dest.saturate; } @@ -633,7 +636,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (!(instr->dest.write_mask & (1 << i))) continue; - emit(MOV(offset(result, i), offset(temp, i))); + bld.MOV(offset(result, i), offset(temp, i)); } } return; @@ -665,13 +668,13 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) switch (instr->op) { case nir_op_i2f: case nir_op_u2f: - inst = emit(MOV(result, op[0])); + inst = bld.MOV(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_f2i: case nir_op_f2u: - emit(MOV(result, op[0])); + bld.MOV(result, op[0]); break; case nir_op_fsign: { @@ -680,17 +683,17 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not * zero. */ - emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); + bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD); op[0].type = BRW_REGISTER_TYPE_UD; result.type = BRW_REGISTER_TYPE_UD; - emit(AND(result_int, op[0], fs_reg(0x80000000u))); + bld.AND(result_int, op[0], fs_reg(0x80000000u)); - inst = emit(OR(result_int, result_int, fs_reg(0x3f800000u))); + inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u)); inst->predicate = BRW_PREDICATE_NORMAL; if (instr->dest.saturate) { - inst = emit(MOV(result, result)); + inst = bld.MOV(result, result); inst->saturate = true; } break; @@ -701,120 +704,88 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) * -> non-negative val generates 0x00000000. * Predicated OR sets 1 if val is positive. */ - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G)); - emit(ASR(result, op[0], fs_reg(31))); - inst = emit(OR(result, result, fs_reg(1))); + bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G); + bld.ASR(result, op[0], fs_reg(31)); + inst = bld.OR(result, result, fs_reg(1)); inst->predicate = BRW_PREDICATE_NORMAL; break; case nir_op_frcp: - inst = emit_math(SHADER_OPCODE_RCP, result, op[0]); + inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fexp2: - inst = emit_math(SHADER_OPCODE_EXP2, result, op[0]); + inst = bld.emit(SHADER_OPCODE_EXP2, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_flog2: - inst = emit_math(SHADER_OPCODE_LOG2, result, op[0]); + inst = bld.emit(SHADER_OPCODE_LOG2, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fsin: - inst = emit_math(SHADER_OPCODE_SIN, result, op[0]); + inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - inst = emit_math(SHADER_OPCODE_COS, result, op[0]); + inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fddx: if (fs_key->high_quality_derivatives) { - inst = emit(FS_OPCODE_DDX_FINE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]); } else { - inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]); } inst->saturate = instr->dest.saturate; break; case nir_op_fddx_fine: - inst = emit(FS_OPCODE_DDX_FINE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fddx_coarse: - inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]); + inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fddy: if (fs_key->high_quality_derivatives) { - inst = emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], + fs_reg(fs_key->render_to_fbo)); } else { - inst = emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], + fs_reg(fs_key->render_to_fbo)); } inst->saturate = instr->dest.saturate; break; case nir_op_fddy_fine: - inst = emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], + fs_reg(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; case nir_op_fddy_coarse: - inst = emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], + fs_reg(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; case nir_op_fadd: case nir_op_iadd: - inst = emit(ADD(result, op[0], op[1])); + inst = bld.ADD(result, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_fmul: - inst = emit(MUL(result, op[0], op[1])); + inst = bld.MUL(result, op[0], op[1]); inst->saturate = instr->dest.saturate; break; - case nir_op_imul: { - if (devinfo->gen >= 8) { - emit(MUL(result, op[0], op[1])); - break; - } else { - nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src); - nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); - - if (value0 && value0->u[0] < (1 << 16)) { - if (devinfo->gen < 7) { - emit(MUL(result, op[0], op[1])); - } else { - emit(MUL(result, op[1], op[0])); - } - break; - } else if (value1 && value1->u[0] < (1 << 16)) { - if (devinfo->gen < 7) { - emit(MUL(result, op[1], op[0])); - } else { - emit(MUL(result, op[0], op[1])); - } - break; - } - } - - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(result, fs_reg(acc))); + case nir_op_imul: + bld.MUL(result, op[0], op[1]); break; - } case nir_op_imul_high: case nir_op_umul_high: { @@ -823,8 +794,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - fs_inst *mul = emit(MUL(acc, op[0], op[1])); - emit(MACH(result, op[0], op[1])); + fs_inst *mul = bld.MUL(acc, op[0], op[1]); + bld.MACH(result, op[0], op[1]); /* Until Gen8, integer multiplies read 32-bits from one source, and * 16-bits from the other, and relying on the MACH instruction to @@ -852,7 +823,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_idiv: case nir_op_udiv: - emit_math(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]); + bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]); break; case nir_op_uadd_carry: { @@ -862,8 +833,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), BRW_REGISTER_TYPE_UD); - emit(ADDC(reg_null_ud, op[0], op[1])); - emit(MOV(result, fs_reg(acc))); + bld.ADDC(bld.null_reg_ud(), op[0], op[1]); + bld.MOV(result, fs_reg(acc)); break; } @@ -874,63 +845,63 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), BRW_REGISTER_TYPE_UD); - emit(SUBB(reg_null_ud, op[0], op[1])); - emit(MOV(result, fs_reg(acc))); + bld.SUBB(bld.null_reg_ud(), op[0], op[1]); + bld.MOV(result, fs_reg(acc)); break; } case nir_op_umod: - emit_math(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); + bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); break; case nir_op_flt: case nir_op_ilt: case nir_op_ult: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_L)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_L); break; case nir_op_fge: case nir_op_ige: case nir_op_uge: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_GE); break; case nir_op_feq: case nir_op_ieq: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_Z); break; case nir_op_fne: case nir_op_ine: - emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ)); + bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ); break; case nir_op_inot: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); } - emit(NOT(result, op[0])); + bld.NOT(result, op[0]); break; case nir_op_ixor: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); resolve_source_modifiers(&op[1]); } - emit(XOR(result, op[0], op[1])); + bld.XOR(result, op[0], op[1]); break; case nir_op_ior: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); resolve_source_modifiers(&op[1]); } - emit(OR(result, op[0], op[1])); + bld.OR(result, op[0], op[1]); break; case nir_op_iand: if (devinfo->gen >= 8) { resolve_source_modifiers(&op[0]); resolve_source_modifiers(&op[1]); } - emit(AND(result, op[0], op[1])); + bld.AND(result, op[0], op[1]); break; case nir_op_fdot2: @@ -978,53 +949,53 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) unreachable("not reached: should be handled by ldexp_to_arith()"); case nir_op_fsqrt: - inst = emit_math(SHADER_OPCODE_SQRT, result, op[0]); + inst = bld.emit(SHADER_OPCODE_SQRT, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_frsq: - inst = emit_math(SHADER_OPCODE_RSQ, result, op[0]); + inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_b2i: - emit(AND(result, op[0], fs_reg(1))); + bld.AND(result, op[0], fs_reg(1)); break; case nir_op_b2f: - emit(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u))); + bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u)); break; case nir_op_f2b: - emit(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); + bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); break; case nir_op_i2b: - emit(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); + bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ); break; case nir_op_ftrunc: - inst = emit(RNDZ(result, op[0])); + inst = bld.RNDZ(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fceil: { op[0].negate = !op[0].negate; fs_reg temp = vgrf(glsl_type::float_type); - emit(RNDD(temp, op[0])); + bld.RNDD(temp, op[0]); temp.negate = true; - inst = emit(MOV(result, temp)); + inst = bld.MOV(result, temp); inst->saturate = instr->dest.saturate; break; } case nir_op_ffloor: - inst = emit(RNDD(result, op[0])); + inst = bld.RNDD(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_ffract: - inst = emit(FRC(result, op[0])); + inst = bld.FRC(result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fround_even: - inst = emit(RNDE(result, op[0])); + inst = bld.RNDE(result, op[0]); inst->saturate = instr->dest.saturate; break; @@ -1032,11 +1003,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_imin: case nir_op_umin: if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]); + inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_L; } else { - emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L)); - inst = emit(SEL(result, op[0], op[1])); + bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L); + inst = bld.SEL(result, op[0], op[1]); inst->predicate = BRW_PREDICATE_NORMAL; } inst->saturate = instr->dest.saturate; @@ -1046,11 +1017,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_imax: case nir_op_umax: if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]); + inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_GE; } else { - emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE)); - inst = emit(SEL(result, op[0], op[1])); + bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE); + inst = bld.SEL(result, op[0], op[1]); inst->predicate = BRW_PREDICATE_NORMAL; } inst->saturate = instr->dest.saturate; @@ -1069,57 +1040,57 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) unreachable("not reached: should be handled by lower_packing_builtins"); case nir_op_unpack_half_2x16_split_x: - inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]); + inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_unpack_half_2x16_split_y: - inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]); + inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fpow: - inst = emit_math(SHADER_OPCODE_POW, result, op[0], op[1]); + inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]); inst->saturate = instr->dest.saturate; break; case nir_op_bitfield_reverse: - emit(BFREV(result, op[0])); + bld.BFREV(result, op[0]); break; case nir_op_bit_count: - emit(CBIT(result, op[0])); + bld.CBIT(result, op[0]); break; case nir_op_ufind_msb: case nir_op_ifind_msb: { - emit(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0])); + bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); /* FBH counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ - emit(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ)); + bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ); fs_reg neg_result(result); neg_result.negate = true; - inst = emit(ADD(result, neg_result, fs_reg(31))); + inst = bld.ADD(result, neg_result, fs_reg(31)); inst->predicate = BRW_PREDICATE_NORMAL; break; } case nir_op_find_lsb: - emit(FBL(result, op[0])); + bld.FBL(result, op[0]); break; case nir_op_ubitfield_extract: case nir_op_ibitfield_extract: - emit(BFE(result, op[2], op[1], op[0])); + bld.BFE(result, op[2], op[1], op[0]); break; case nir_op_bfm: - emit(BFI1(result, op[0], op[1])); + bld.BFI1(result, op[0], op[1]); break; case nir_op_bfi: - emit(BFI2(result, op[0], op[1], op[2])); + bld.BFI2(result, op[0], op[1], op[2]); break; case nir_op_bitfield_insert: @@ -1127,26 +1098,26 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) "lower_instructions::bitfield_insert_to_bfm_bfi"); case nir_op_ishl: - emit(SHL(result, op[0], op[1])); + bld.SHL(result, op[0], op[1]); break; case nir_op_ishr: - emit(ASR(result, op[0], op[1])); + bld.ASR(result, op[0], op[1]); break; case nir_op_ushr: - emit(SHR(result, op[0], op[1])); + bld.SHR(result, op[0], op[1]); break; case nir_op_pack_half_2x16_split: - emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); + bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); break; case nir_op_ffma: - inst = emit(MAD(result, op[2], op[1], op[0])); + inst = bld.MAD(result, op[2], op[1], op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_flrp: - inst = emit_lrp(result, op[0], op[1], op[2]); + inst = bld.LRP(result, op[0], op[1], op[2]); inst->saturate = instr->dest.saturate; break; @@ -1154,8 +1125,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (optimize_frontfacing_ternary(instr, result)) return; - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - inst = emit(SEL(result, op[1], op[2])); + bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ); + inst = bld.SEL(result, op[1], op[2]); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -1169,9 +1140,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) if (devinfo->gen <= 5 && (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { fs_reg masked = vgrf(glsl_type::int_type); - emit(AND(masked, result, fs_reg(1))); + bld.AND(masked, result, fs_reg(1)); masked.negate = true; - emit(MOV(retype(result, BRW_REGISTER_TYPE_D), masked)); + bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked); } } @@ -1190,8 +1161,8 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, int multiplier = nir_reg->num_components * (v->dispatch_width / 8); reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type)); - v->emit(v->MUL(*reg.reladdr, v->get_nir_src(*indirect), - fs_reg(multiplier))); + v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect), + fs_reg(multiplier)); } return reg; @@ -1203,11 +1174,10 @@ fs_visitor::get_nir_src(nir_src src) if (src.is_ssa) { assert(src.ssa->parent_instr->type == nir_instr_type_load_const); nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); - fs_reg reg = vgrf(src.ssa->num_components); - reg.type = BRW_REGISTER_TYPE_D; + fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components); for (unsigned i = 0; i < src.ssa->num_components; ++i) - emit(MOV(offset(reg, i), fs_reg(load->value.i[i]))); + bld.MOV(offset(reg, i), fs_reg(load->value.i[i])); return reg; } else { @@ -1230,24 +1200,25 @@ fs_visitor::get_nir_dest(nir_dest dest) } void -fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask) +fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, + unsigned wr_mask) { for (unsigned i = 0; i < 4; i++) { if (!((wr_mask >> i) & 1)) continue; - fs_inst *new_inst = new(mem_ctx) fs_inst(*inst); + fs_inst *new_inst = new(mem_ctx) fs_inst(inst); new_inst->dst = offset(new_inst->dst, i); for (unsigned j = 0; j < new_inst->sources; j++) - if (inst->src[j].file == GRF) + if (new_inst->src[j].file == GRF) new_inst->src[j] = offset(new_inst->src[j], i); - emit(new_inst); + bld.emit(new_inst); } } void -fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) +fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -1265,12 +1236,12 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ fs_inst *cmp; if (instr->intrinsic == nir_intrinsic_discard_if) { - cmp = emit(CMP(reg_null_f, get_nir_src(instr->src[0]), - fs_reg(0), BRW_CONDITIONAL_Z)); + cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), + fs_reg(0), BRW_CONDITIONAL_Z); } else { fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ)); + cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ); } cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; @@ -1307,8 +1278,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } case nir_intrinsic_load_front_face: - emit(MOV(retype(dest, BRW_REGISTER_TYPE_D), - *emit_frontfacing_interpolation())); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), + *emit_frontfacing_interpolation()); break; case nir_intrinsic_load_vertex_id: @@ -1318,7 +1289,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; assert(vertex_id.file != BAD_FILE); dest.type = vertex_id.type; - emit(MOV(dest, vertex_id)); + bld.MOV(dest, vertex_id); break; } @@ -1326,7 +1297,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; assert(base_vertex.file != BAD_FILE); dest.type = base_vertex.type; - emit(MOV(dest, base_vertex)); + bld.MOV(dest, base_vertex); break; } @@ -1334,7 +1305,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; assert(instance_id.file != BAD_FILE); dest.type = instance_id.type; - emit(MOV(dest, instance_id)); + bld.MOV(dest, instance_id); break; } @@ -1342,7 +1313,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN]; assert(sample_mask_in.file != BAD_FILE); dest.type = sample_mask_in.type; - emit(MOV(dest, sample_mask_in)); + bld.MOV(dest, sample_mask_in); break; } @@ -1350,8 +1321,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; assert(sample_pos.file != BAD_FILE); dest.type = sample_pos.type; - emit(MOV(dest, sample_pos)); - emit(MOV(offset(dest, 1), offset(sample_pos, 1))); + bld.MOV(dest, sample_pos); + bld.MOV(offset(dest, 1), offset(sample_pos, 1)); break; } @@ -1359,7 +1330,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID]; assert(sample_id.file != BAD_FILE); dest.type = sample_id.type; - emit(MOV(dest, sample_id)); + bld.MOV(dest, sample_id); break; } @@ -1377,16 +1348,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) index -= num_direct_uniforms; } - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(uniform_reg, dest.type), index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = offset(retype(uniform_reg, dest.type), index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + index++; - emit(MOV(dest, src)); - dest = offset(dest, 1); - } + bld.MOV(dest, src); + dest = offset(dest, 1); } break; } @@ -1417,9 +1386,9 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) * from any live channel. */ surf_index = vgrf(glsl_type::uint_type); - emit(ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start))); - emit_uniformize(surf_index, surf_index); + bld.ADD(surf_index, get_nir_src(instr->src[0]), + fs_reg(stage_prog_data->binding_table.ubo_start)); + bld.emit_uniformize(surf_index, surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -1432,21 +1401,21 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (has_indirect) { /* Turn the byte offset into a dword offset. */ fs_reg base_offset = vgrf(glsl_type::int_type); - emit(SHR(base_offset, retype(get_nir_src(instr->src[1]), - BRW_REGISTER_TYPE_D), - fs_reg(2))); + bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_D), + fs_reg(2)); unsigned vec4_offset = instr->const_index[0] / 4; for (int i = 0; i < instr->num_components; i++) - emit(VARYING_PULL_CONSTANT_LOAD(offset(dest, i), surf_index, - base_offset, vec4_offset + i)); + VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index, + base_offset, vec4_offset + i); } else { fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); - emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, - surf_index, const_offset_reg); + bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, + surf_index, const_offset_reg); for (unsigned i = 0; i < instr->num_components; i++) { packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i); @@ -1456,7 +1425,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ assert(packed_consts.subreg_offset < 32); - emit(MOV(dest, packed_consts)); + bld.MOV(dest, packed_consts); dest = offset(dest, 1); } } @@ -1468,17 +1437,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* fallthrough */ case nir_intrinsic_load_input: { unsigned index = 0; - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(nir_inputs, dest.type), - instr->const_index[0] + index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; - - emit(MOV(dest, src)); - dest = offset(dest, 1); - } + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = offset(retype(nir_inputs, dest.type), + instr->const_index[0] + index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + index++; + + bld.MOV(dest, src); + dest = offset(dest, 1); } break; } @@ -1510,7 +1477,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ no16("interpolate_at_* not yet supported in SIMD16 mode."); - fs_reg dst_xy = vgrf(2); + fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); /* For most messages, we need one reg of ignored data; the hardware * requires mlen==1 even when there is no payload. in the per-slot @@ -1522,7 +1489,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_centroid: - inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, + dst_xy, src, fs_reg(0u)); break; case nir_intrinsic_interp_var_at_sample: { @@ -1530,8 +1498,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); assert(const_sample); unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, - fs_reg(msg_data)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + fs_reg(msg_data)); break; } @@ -1542,17 +1510,17 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, - fs_reg(off_x | (off_y << 4))); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, + fs_reg(off_x | (off_y << 4))); } else { src = vgrf(glsl_type::ivec2_type); fs_reg offset_src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F); for (int i = 0; i < 2; i++) { fs_reg temp = vgrf(glsl_type::float_type); - emit(MUL(temp, offset(offset_src, i), fs_reg(16.0f))); + bld.MUL(temp, offset(offset_src, i), fs_reg(16.0f)); fs_reg itemp = vgrf(glsl_type::int_type); - emit(MOV(itemp, temp)); /* float to int */ + bld.MOV(itemp, temp); /* float to int */ /* Clamp the upper end of the range to +7/16. * ARB_gpu_shader5 requires that we support a maximum offset @@ -1569,14 +1537,13 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) * implementation-dependent constant * FRAGMENT_INTERPOLATION_OFFSET_BITS" */ - - emit(BRW_OPCODE_SEL, offset(src, i), itemp, fs_reg(7)) - ->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */ + set_condmod(BRW_CONDITIONAL_L, + bld.SEL(offset(src, i), itemp, fs_reg(7))); } mlen = 2; - inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, - fs_reg(0u)); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, + fs_reg(0u)); } break; } @@ -1594,7 +1561,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); src.type = dest.type; - emit(FS_OPCODE_LINTERP, dest, dst_xy, src); + bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src); dest = offset(dest, 1); } break; @@ -1606,27 +1573,29 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_store_output: { fs_reg src = get_nir_src(instr->src[0]); unsigned index = 0; - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg new_dest = offset(retype(nir_outputs, src.type), - instr->const_index[0] + index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); - index++; - emit(MOV(new_dest, src)); - src = offset(src, 1); - } + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg new_dest = offset(retype(nir_outputs, src.type), + instr->const_index[0] + index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); + index++; + bld.MOV(new_dest, src); + src = offset(src, 1); } break; } + case nir_intrinsic_barrier: + emit_barrier(); + break; + default: unreachable("unknown intrinsic"); } } void -fs_visitor::nir_emit_texture(nir_tex_instr *instr) +fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { uint32_t set = instr->sampler_set; uint32_t binding = instr->sampler_index; @@ -1650,7 +1619,8 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array; - int lod_components = 0, offset_components = 0; + int lod_components = 0; + int UNUSED offset_components = 0; fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset; @@ -1719,8 +1689,8 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); - emit(ADD(sampler_reg, src, fs_reg(sampler))); - emit_uniformize(sampler_reg, sampler_reg); + bld.ADD(sampler_reg, src, fs_reg(sampler)); + bld.emit_uniformize(sampler_reg, sampler_reg); break; } @@ -1789,18 +1759,19 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; unsigned num_components = nir_tex_instr_dest_size(instr); - emit_percomp(MOV(dest, this->result), (1 << num_components) - 1); + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, dest, this->result), + (1 << num_components) - 1); } void -fs_visitor::nir_emit_jump(nir_jump_instr *instr) +fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) { switch (instr->type) { case nir_jump_break: - emit(BRW_OPCODE_BREAK); + bld.emit(BRW_OPCODE_BREAK); break; case nir_jump_continue: - emit(BRW_OPCODE_CONTINUE); + bld.emit(BRW_OPCODE_CONTINUE); break; case nir_jump_return: default: diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index cf3da7b1882..d92d4bbd81d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -85,9 +85,9 @@ fs_visitor::opt_peephole_predicated_break() * instruction to set the flag register. */ if (devinfo->gen == 6 && if_inst->conditional_mod) { - fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - if_inst->insert_before(if_block, cmp_inst); + bld.at(if_block, if_inst) + .CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); jump_inst->predicate = BRW_PREDICATE_NORMAL; } else { jump_inst->predicate = if_inst->predicate; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 582d0993f1c..364fc4a5ad2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -30,6 +30,8 @@ #include "glsl/glsl_types.h" #include "glsl/ir_optimization.h" +using namespace brw; + static void assign_reg(unsigned *reg_hw_locations, fs_reg *reg) { @@ -468,14 +470,14 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * see if we can actually use MRFs to do spills without overwriting normal MRF * contents. */ -void -fs_visitor::get_used_mrfs(bool *mrf_used) +static void +get_used_mrfs(fs_visitor *v, bool *mrf_used) { - int reg_width = dispatch_width / 8; + int reg_width = v->dispatch_width / 8; memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool)); - foreach_block_and_inst(block, fs_inst, inst, cfg) { + foreach_block_and_inst(block, fs_inst, inst, v->cfg) { if (inst->dst.file == MRF) { int reg = inst->dst.reg & ~BRW_MRF_COMPR4; mrf_used[reg] = true; @@ -489,7 +491,7 @@ fs_visitor::get_used_mrfs(bool *mrf_used) } if (inst->mlen > 0) { - for (int i = 0; i < implied_mrf_writes(inst); i++) { + for (int i = 0; i < v->implied_mrf_writes(inst); i++) { mrf_used[inst->base_mrf + i] = true; } } @@ -500,12 +502,14 @@ fs_visitor::get_used_mrfs(bool *mrf_used) * Sets interference between virtual GRFs and usage of the high GRFs for SEND * messages (treated as MRFs in code generation). */ -void -fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) +static void +setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g, + int first_mrf_node, int *first_used_mrf) { bool mrf_used[BRW_MAX_MRF]; - get_used_mrfs(mrf_used); + get_used_mrfs(v, mrf_used); + *first_used_mrf = BRW_MAX_MRF; for (int i = 0; i < BRW_MAX_MRF; i++) { /* Mark each MRF reg node as being allocated to its physical register. * @@ -518,7 +522,10 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) * that are used as conflicting with all virtual GRFs. */ if (mrf_used[i]) { - for (unsigned j = 0; j < this->alloc.count; j++) { + if (i < *first_used_mrf) + *first_used_mrf = i; + + for (unsigned j = 0; j < v->alloc.count; j++) { ra_add_node_interference(g, first_mrf_node + i, j); } } @@ -528,7 +535,6 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) bool fs_visitor::assign_regs(bool allow_spilling) { - struct brw_compiler *compiler = brw->intelScreen->compiler; /* Most of this allocation was written for a reg_width of 1 * (dispatch_width == 8). In extending to SIMD16, the code was * left in place and it was converted to have the hardware @@ -584,7 +590,9 @@ fs_visitor::assign_regs(bool allow_spilling) setup_payload_interference(g, payload_node_count, first_payload_node); if (devinfo->gen >= 7) { - setup_mrf_hack_interference(g, first_mrf_hack_node); + int first_used_mrf = BRW_MAX_MRF; + setup_mrf_hack_interference(this, g, first_mrf_hack_node, + &first_used_mrf); foreach_block_and_inst(block, fs_inst, inst, cfg) { /* When we do send-from-GRF for FB writes, we need to ensure that @@ -600,6 +608,13 @@ fs_visitor::assign_regs(bool allow_spilling) if (inst->eot) { int size = alloc.sizes[inst->src[0].reg]; int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1; + + /* If something happened to spill, we want to push the EOT send + * register early enough in the register file that we don't + * conflict with any used MRF hack registers. + */ + reg -= BRW_MAX_MRF - first_used_mrf; + ra_set_node_reg(g, inst->src[0].reg, reg); break; } @@ -696,25 +711,24 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, dst.width = 16; } + const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) + .group(reg_size * 8, 0) + .at(block, inst); + for (int i = 0; i < count / reg_size; i++) { /* The gen7 descriptor-based offset is 12 bits of HWORD units. */ bool gen7_read = devinfo->gen >= 7 && spill_offset < (1 << 12) * REG_SIZE; - - fs_inst *unspill_inst = - new(mem_ctx) fs_inst(gen7_read ? - SHADER_OPCODE_GEN7_SCRATCH_READ : - SHADER_OPCODE_GEN4_SCRATCH_READ, - dst); + fs_inst *unspill_inst = ibld.emit(gen7_read ? + SHADER_OPCODE_GEN7_SCRATCH_READ : + SHADER_OPCODE_GEN4_SCRATCH_READ, + dst); unspill_inst->offset = spill_offset; - unspill_inst->ir = inst->ir; - unspill_inst->annotation = inst->annotation; unspill_inst->regs_written = reg_size; if (!gen7_read) { unspill_inst->base_mrf = 14; unspill_inst->mlen = 1; /* header contains offset */ } - inst->insert_before(block, unspill_inst); dst.reg_offset += reg_size; spill_offset += reg_size * REG_SIZE; @@ -732,17 +746,17 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, reg_size = 2; } + const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) + .group(reg_size * 8, 0) + .at(block, inst->next); + for (int i = 0; i < count / reg_size; i++) { fs_inst *spill_inst = - new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_WRITE, - reg_size * 8, reg_null_f, src); + ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, bld.null_reg_f(), src); src.reg_offset += reg_size; spill_inst->offset = spill_offset + i * reg_size * REG_SIZE; - spill_inst->ir = inst->ir; - spill_inst->annotation = inst->annotation; spill_inst->mlen = 1 + reg_size; /* header, value */ spill_inst->base_mrf = spill_base_mrf; - inst->insert_after(block, spill_inst); } } @@ -839,7 +853,7 @@ fs_visitor::spill_reg(int spill_reg) */ if (!spilled_any_registers) { bool mrf_used[BRW_MAX_MRF]; - get_used_mrfs(mrf_used); + get_used_mrfs(this, mrf_used); for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) { if (mrf_used[i]) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index 52aa5590c2e..8660ec08b8f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -37,6 +37,8 @@ */ #define MAX_MOVS 8 /**< The maximum number of MOVs to attempt to match. */ +using namespace brw; + /** * Scans forwards from an IF counting consecutive MOV instructions in the * "then" and "else" blocks of the if statement. @@ -153,9 +155,6 @@ fs_visitor::opt_peephole_sel() if (movs == 0) continue; - fs_inst *sel_inst[MAX_MOVS] = { NULL }; - fs_inst *mov_imm_inst[MAX_MOVS] = { NULL }; - enum brw_predicate predicate; bool predicate_inverse; if (devinfo->gen == 6 && if_inst->conditional_mod) { @@ -188,9 +187,21 @@ fs_visitor::opt_peephole_sel() movs = i; break; } + } + + if (movs == 0) + continue; + + const fs_builder ibld = bld.at(block, if_inst); + /* Emit a CMP if our IF used the embedded comparison */ + if (devinfo->gen == 6 && if_inst->conditional_mod) + ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); + + for (int i = 0; i < movs; i++) { if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) { - sel_inst[i] = MOV(then_mov[i]->dst, then_mov[i]->src[0]); + ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]); } else { /* Only the last source register can be a constant, so if the MOV * in the "then" clause uses a constant, we need to put it in a @@ -200,29 +211,13 @@ fs_visitor::opt_peephole_sel() if (src0.file == IMM) { src0 = vgrf(glsl_type::float_type); src0.type = then_mov[i]->src[0].type; - mov_imm_inst[i] = MOV(src0, then_mov[i]->src[0]); + ibld.MOV(src0, then_mov[i]->src[0]); } - sel_inst[i] = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]); - sel_inst[i]->predicate = predicate; - sel_inst[i]->predicate_inverse = predicate_inverse; + set_predicate_inv(predicate, predicate_inverse, + ibld.SEL(then_mov[i]->dst, src0, + else_mov[i]->src[0])); } - } - - if (movs == 0) - continue; - - /* Emit a CMP if our IF used the embedded comparison */ - if (devinfo->gen == 6 && if_inst->conditional_mod) { - fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - if_inst->insert_before(block, cmp_inst); - } - - for (int i = 0; i < movs; i++) { - if (mov_imm_inst[i]) - if_inst->insert_before(block, mov_imm_inst[i]); - if_inst->insert_before(block, sel_inst[i]); then_mov[i]->remove(then_block); else_mov[i]->remove(else_block); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index e1f47d4ec44..9a4bad6bcf5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -47,6 +47,7 @@ #include "glsl/ir_optimization.h" #include "program/sampler.h" +using namespace brw; fs_reg * fs_visitor::emit_vs_system_value(int location) @@ -76,1371 +77,6 @@ fs_visitor::emit_vs_system_value(int location) return reg; } -void -fs_visitor::visit(ir_variable *ir) -{ - fs_reg *reg = NULL; - - if (variable_storage(ir)) - return; - - if (ir->data.mode == ir_var_shader_in) { - assert(ir->data.location != -1); - if (stage == MESA_SHADER_VERTEX) { - reg = new(this->mem_ctx) - fs_reg(ATTR, ir->data.location, - brw_type_for_base_type(ir->type->get_scalar_type())); - } else if (ir->data.location == VARYING_SLOT_POS) { - reg = emit_fragcoord_interpolation(ir->data.pixel_center_integer, - ir->data.origin_upper_left); - } else if (ir->data.location == VARYING_SLOT_FACE) { - reg = emit_frontfacing_interpolation(); - } else { - reg = new(this->mem_ctx) fs_reg(vgrf(ir->type)); - emit_general_interpolation(*reg, ir->name, ir->type, - (glsl_interp_qualifier) ir->data.interpolation, - ir->data.location, ir->data.centroid, - ir->data.sample); - } - assert(reg); - hash_table_insert(this->variable_ht, reg, ir); - return; - } else if (ir->data.mode == ir_var_shader_out) { - reg = new(this->mem_ctx) fs_reg(vgrf(ir->type)); - - if (stage == MESA_SHADER_VERTEX) { - int vector_elements = - ir->type->is_array() ? ir->type->fields.array->vector_elements - : ir->type->vector_elements; - - for (int i = 0; i < (type_size(ir->type) + 3) / 4; i++) { - int output = ir->data.location + i; - this->outputs[output] = *reg; - this->outputs[output].reg_offset = i * 4; - this->output_components[output] = vector_elements; - } - - } else if (ir->data.index > 0) { - assert(ir->data.location == FRAG_RESULT_DATA0); - assert(ir->data.index == 1); - this->dual_src_output = *reg; - this->do_dual_src = true; - } else if (ir->data.location == FRAG_RESULT_COLOR) { - /* Writing gl_FragColor outputs to all color regions. */ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) { - this->outputs[i] = *reg; - this->output_components[i] = 4; - } - } else if (ir->data.location == FRAG_RESULT_DEPTH) { - this->frag_depth = *reg; - } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) { - this->sample_mask = *reg; - } else { - /* gl_FragData or a user-defined FS output */ - assert(ir->data.location >= FRAG_RESULT_DATA0 && - ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); - - int vector_elements = - ir->type->is_array() ? ir->type->fields.array->vector_elements - : ir->type->vector_elements; - - /* General color output. */ - for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { - int output = ir->data.location - FRAG_RESULT_DATA0 + i; - this->outputs[output] = offset(*reg, vector_elements * i); - this->output_components[output] = vector_elements; - } - } - } else if (ir->data.mode == ir_var_uniform) { - int param_index = uniforms; - - /* Thanks to the lower_ubo_reference pass, we will see only - * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO - * variables, so no need for them to be in variable_ht. - * - * Some uniforms, such as samplers and atomic counters, have no actual - * storage, so we should ignore them. - */ - if (ir->is_in_uniform_block() || type_size(ir->type) == 0) - return; - - if (dispatch_width == 16) { - if (!variable_storage(ir)) { - fail("Failed to find uniform '%s' in SIMD16\n", ir->name); - } - return; - } - - param_size[param_index] = type_size(ir->type); - if (!strncmp(ir->name, "gl_", 3)) { - setup_builtin_uniform_values(ir); - } else { - setup_uniform_values(ir); - } - - reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); - reg->type = brw_type_for_base_type(ir->type); - - } else if (ir->data.mode == ir_var_system_value) { - switch (ir->data.location) { - case SYSTEM_VALUE_BASE_VERTEX: - case SYSTEM_VALUE_VERTEX_ID: - case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: - case SYSTEM_VALUE_INSTANCE_ID: - reg = emit_vs_system_value(ir->data.location); - break; - case SYSTEM_VALUE_SAMPLE_POS: - reg = emit_samplepos_setup(); - break; - case SYSTEM_VALUE_SAMPLE_ID: - reg = emit_sampleid_setup(); - break; - case SYSTEM_VALUE_SAMPLE_MASK_IN: - assert(devinfo->gen >= 7); - reg = new(mem_ctx) - fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0), - BRW_REGISTER_TYPE_D)); - break; - } - } - - if (!reg) - reg = new(this->mem_ctx) fs_reg(vgrf(ir->type)); - - hash_table_insert(this->variable_ht, reg, ir); -} - -void -fs_visitor::visit(ir_dereference_variable *ir) -{ - fs_reg *reg = variable_storage(ir->var); - - if (!reg) { - fail("Failed to find variable storage for %s\n", ir->var->name); - this->result = fs_reg(reg_null_d); - return; - } - this->result = *reg; -} - -void -fs_visitor::visit(ir_dereference_record *ir) -{ - const glsl_type *struct_type = ir->record->type; - - ir->record->accept(this); - - unsigned int off = 0; - for (unsigned int i = 0; i < struct_type->length; i++) { - if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) - break; - off += type_size(struct_type->fields.structure[i].type); - } - this->result = offset(this->result, off); - this->result.type = brw_type_for_base_type(ir->type); -} - -void -fs_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *constant_index; - fs_reg src; - int element_size = type_size(ir->type); - - constant_index = ir->array_index->as_constant(); - - ir->array->accept(this); - src = this->result; - src.type = brw_type_for_base_type(ir->type); - - if (constant_index) { - if (src.file == ATTR) { - /* Attribute arrays get loaded as one vec4 per element. In that case - * offset the source register. - */ - src.reg += constant_index->value.i[0]; - } else { - assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG); - src = offset(src, constant_index->value.i[0] * element_size); - } - } else { - /* Variable index array dereference. We attach the variable index - * component to the reg as a pointer to a register containing the - * offset. Currently only uniform arrays are supported in this patch, - * and that reladdr pointer is resolved by - * move_uniform_array_access_to_pull_constants(). All other array types - * are lowered by lower_variable_index_to_cond_assign(). - */ - ir->array_index->accept(this); - - fs_reg index_reg; - index_reg = vgrf(glsl_type::int_type); - emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size)); - - if (src.reladdr) { - emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg); - } - - src.reladdr = ralloc(mem_ctx, fs_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); - } - this->result = src; -} - -fs_inst * -fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y, - const fs_reg &a) -{ - if (devinfo->gen < 6) { - /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ - fs_reg y_times_a = vgrf(glsl_type::float_type); - fs_reg one_minus_a = vgrf(glsl_type::float_type); - fs_reg x_times_one_minus_a = vgrf(glsl_type::float_type); - - emit(MUL(y_times_a, y, a)); - - fs_reg negative_a = a; - negative_a.negate = !a.negate; - emit(ADD(one_minus_a, negative_a, fs_reg(1.0f))); - emit(MUL(x_times_one_minus_a, x, one_minus_a)); - - return emit(ADD(dst, x_times_one_minus_a, y_times_a)); - } else { - /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so - * we need to reorder the operands. - */ - return emit(LRP(dst, a, y, x)); - } -} - -void -fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst, - const fs_reg &src0, const fs_reg &src1) -{ - assert(conditionalmod == BRW_CONDITIONAL_GE || - conditionalmod == BRW_CONDITIONAL_L); - - fs_inst *inst; - - if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(reg_null_d, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } -} - -void -fs_visitor::emit_uniformize(const fs_reg &dst, const fs_reg &src) -{ - const fs_reg chan_index = vgrf(glsl_type::uint_type); - - emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0)) - ->force_writemask_all = true; - emit(SHADER_OPCODE_BROADCAST, component(dst, 0), - src, component(chan_index, 0)) - ->force_writemask_all = true; -} - -bool -fs_visitor::try_emit_saturate(ir_expression *ir) -{ - if (ir->operation != ir_unop_saturate) - return false; - - ir_rvalue *sat_val = ir->operands[0]; - - fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); - - sat_val->accept(this); - fs_reg src = this->result; - - fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); - - /* If the last instruction from our accept() generated our - * src, just set the saturate flag instead of emmitting a separate mov. - */ - fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); - if (modify && modify->regs_written == modify->dst.width / 8 && - modify->can_do_saturate()) { - modify->saturate = true; - this->result = src; - return true; - } - - return false; -} - -bool -fs_visitor::try_emit_line(ir_expression *ir) -{ - /* LINE's src0 must be of type float. */ - if (ir->type != glsl_type::float_type) - return false; - - ir_rvalue *nonmul = ir->operands[1]; - ir_expression *mul = ir->operands[0]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) { - nonmul = ir->operands[0]; - mul = ir->operands[1]->as_expression(); - - if (!mul || mul->operation != ir_binop_mul) - return false; - } - - ir_constant *const_add = nonmul->as_constant(); - if (!const_add) - return false; - - int add_operand_vf = brw_float_to_vf(const_add->value.f[0]); - if (add_operand_vf == -1) - return false; - - ir_rvalue *non_const_mul = mul->operands[1]; - ir_constant *const_mul = mul->operands[0]->as_constant(); - if (!const_mul) { - const_mul = mul->operands[1]->as_constant(); - - if (!const_mul) - return false; - - non_const_mul = mul->operands[0]; - } - - int mul_operand_vf = brw_float_to_vf(const_mul->value.f[0]); - if (mul_operand_vf == -1) - return false; - - non_const_mul->accept(this); - fs_reg src1 = this->result; - - fs_reg src0 = vgrf(ir->type); - emit(BRW_OPCODE_MOV, src0, - fs_reg((uint8_t)mul_operand_vf, 0, 0, (uint8_t)add_operand_vf)); - - this->result = vgrf(ir->type); - emit(BRW_OPCODE_LINE, this->result, src0, src1); - return true; -} - -bool -fs_visitor::try_emit_mad(ir_expression *ir) -{ - /* 3-src instructions were introduced in gen6. */ - if (devinfo->gen < 6) - return false; - - /* MAD can only handle floating-point data. */ - if (ir->type != glsl_type::float_type) - return false; - - ir_rvalue *nonmul; - ir_expression *mul; - bool mul_negate, mul_abs; - - for (int i = 0; i < 2; i++) { - mul_negate = false; - mul_abs = false; - - mul = ir->operands[i]->as_expression(); - nonmul = ir->operands[1 - i]; - - if (mul && mul->operation == ir_unop_abs) { - mul = mul->operands[0]->as_expression(); - mul_abs = true; - } else if (mul && mul->operation == ir_unop_neg) { - mul = mul->operands[0]->as_expression(); - mul_negate = true; - } - - if (mul && mul->operation == ir_binop_mul) - break; - } - - if (!mul || mul->operation != ir_binop_mul) - return false; - - nonmul->accept(this); - fs_reg src0 = this->result; - - mul->operands[0]->accept(this); - fs_reg src1 = this->result; - src1.negate ^= mul_negate; - src1.abs = mul_abs; - if (mul_abs) - src1.negate = false; - - mul->operands[1]->accept(this); - fs_reg src2 = this->result; - src2.abs = mul_abs; - if (mul_abs) - src2.negate = false; - - this->result = vgrf(ir->type); - emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); - - return true; -} - -bool -fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir) -{ - /* On platforms that do not natively generate 0u and ~0u for Boolean - * results, b2f expressions that look like - * - * f = b2f(expr cmp 0) - * - * will generate better code by pretending the expression is - * - * f = ir_triop_csel(0.0, 1.0, expr cmp 0) - * - * This is because the last instruction of "expr" can generate the - * condition code for the "cmp 0". This avoids having to do the "-(b & 1)" - * trick to generate 0u or ~0u for the Boolean result. This means code like - * - * mov(16) g16<1>F 1F - * mul.ge.f0(16) null g6<8,8,1>F g14<8,8,1>F - * (+f0) sel(16) m6<1>F g16<8,8,1>F 0F - * - * will be generated instead of - * - * mul(16) g2<1>F g12<8,8,1>F g4<8,8,1>F - * cmp.ge.f0(16) g2<1>D g4<8,8,1>F 0F - * and(16) g4<1>D g2<8,8,1>D 1D - * and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD - * - * When the comparison is != 0.0 using the knowledge that the false case - * already results in zero would allow better code generation by possibly - * avoiding a load-immediate instruction. - */ - ir_expression *cmp = ir->operands[0]->as_expression(); - if (cmp == NULL) - return false; - - if (cmp->operation == ir_binop_nequal) { - for (unsigned i = 0; i < 2; i++) { - ir_constant *c = cmp->operands[i]->as_constant(); - if (c == NULL || !c->is_zero()) - continue; - - ir_expression *expr = cmp->operands[i ^ 1]->as_expression(); - if (expr != NULL) { - fs_reg op[2]; - - for (unsigned j = 0; j < 2; j++) { - cmp->operands[j]->accept(this); - op[j] = this->result; - - resolve_ud_negate(&op[j]); - } - - emit_bool_to_cond_code_of_reg(cmp, op); - - /* In this case we know when the condition is true, op[i ^ 1] - * contains zero. Invert the predicate, use op[i ^ 1] as src0, - * and immediate 1.0f as src1. - */ - this->result = vgrf(ir->type); - op[i ^ 1].type = BRW_REGISTER_TYPE_F; - - fs_inst *inst = emit(SEL(this->result, op[i ^ 1], fs_reg(1.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - return true; - } - } - } - - emit_bool_to_cond_code(cmp); - - fs_reg temp = vgrf(ir->type); - emit(MOV(temp, fs_reg(1.0f))); - - this->result = vgrf(ir->type); - fs_inst *inst = emit(SEL(this->result, temp, fs_reg(0.0f))); - inst->predicate = BRW_PREDICATE_NORMAL; - - return true; -} - -static int -pack_pixel_offset(float x) -{ - /* Clamp upper end of the range to +7/16. See explanation in non-constant - * offset case below. */ - int n = MIN2((int)(x * 16), 7); - return n & 0xf; -} - -void -fs_visitor::emit_interpolate_expression(ir_expression *ir) -{ - /* in SIMD16 mode, the pixel interpolator returns coords interleaved - * 8 channels at a time, same as the barycentric coords presented in - * the FS payload. this requires a bit of extra work to support. - */ - no16("interpolate_at_* not yet supported in SIMD16 mode."); - - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - - ir_dereference * deref = ir->operands[0]->as_dereference(); - ir_swizzle * swiz = NULL; - if (!deref) { - /* the api does not allow a swizzle here, but the varying packing code - * may have pushed one into here. - */ - swiz = ir->operands[0]->as_swizzle(); - assert(swiz); - deref = swiz->val->as_dereference(); - } - assert(deref); - ir_variable * var = deref->variable_referenced(); - assert(var); - - /* 1. collect interpolation factors */ - - fs_reg dst_xy = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1)); - - /* for most messages, we need one reg of ignored data; the hardware requires mlen==1 - * even when there is no payload. in the per-slot offset case, we'll replace this with - * the proper source data. */ - fs_reg src = vgrf(glsl_type::float_type); - int mlen = 1; /* one reg unless overriden */ - int reg_width = dispatch_width / 8; - fs_inst *inst; - - switch (ir->operation) { - case ir_unop_interpolate_at_centroid: - inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); - break; - - case ir_binop_interpolate_at_sample: { - ir_constant *sample_num = ir->operands[1]->as_constant(); - assert(sample_num || !"nonconstant sample number should have been lowered."); - - unsigned msg_data = sample_num->value.i[0] << 4; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data)); - break; - } - - case ir_binop_interpolate_at_offset: { - ir_constant *const_offset = ir->operands[1]->as_constant(); - if (const_offset) { - unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) | - (pack_pixel_offset(const_offset->value.f[1]) << 4); - inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, - fs_reg(msg_data)); - } else { - /* pack the operands: hw wants offsets as 4 bit signed ints */ - ir->operands[1]->accept(this); - src = vgrf(glsl_type::ivec2_type); - fs_reg src2 = src; - for (int i = 0; i < 2; i++) { - fs_reg temp = vgrf(glsl_type::float_type); - emit(MUL(temp, this->result, fs_reg(16.0f))); - emit(MOV(src2, temp)); /* float to int */ - - /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires - * that we support a maximum offset of +0.5, which isn't representable - * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16, - * which is the opposite of what the shader author wanted. - * - * This is legal due to ARB_gpu_shader5's quantization rules: - * - * "Not all values of <offset> may be supported; x and y offsets may - * be rounded to fixed-point values with the number of fraction bits - * given by the implementation-dependent constant - * FRAGMENT_INTERPOLATION_OFFSET_BITS" - */ - - fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7)); - inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */ - - src2 = offset(src2, 1); - this->result = offset(this->result, 1); - } - - mlen = 2 * reg_width; - inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, - fs_reg(0u)); - } - break; - } - - default: - unreachable("not reached"); - } - - inst->mlen = mlen; - inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */ - inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) == - INTERP_QUALIFIER_NOPERSPECTIVE; - - /* 2. emit linterp */ - - fs_reg res = vgrf(ir->type); - this->result = res; - - for (int i = 0; i < ir->type->vector_elements; i++) { - int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i; - emit(FS_OPCODE_LINTERP, res, dst_xy, - fs_reg(interp_reg(var->data.location, ch))); - res = offset(res, 1); - } -} - -void -fs_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - fs_reg op[3], temp; - fs_inst *inst; - struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; - - assert(ir->get_num_operands() <= 3); - - if (try_emit_saturate(ir)) - return; - - /* Deal with the real oddball stuff first */ - switch (ir->operation) { - case ir_binop_add: - if (devinfo->gen <= 5 && try_emit_line(ir)) - return; - if (try_emit_mad(ir)) - return; - break; - - case ir_triop_csel: - ir->operands[1]->accept(this); - op[1] = this->result; - ir->operands[2]->accept(this); - op[2] = this->result; - - emit_bool_to_cond_code(ir->operands[0]); - - this->result = vgrf(ir->type); - inst = emit(SEL(this->result, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - return; - - case ir_unop_b2f: - if (devinfo->gen <= 5 && try_emit_b2f_of_comparison(ir)) - return; - break; - - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - emit_interpolate_expression(ir); - return; - - default: - break; - } - - for (operand = 0; operand < ir->get_num_operands(); operand++) { - ir->operands[operand]->accept(this); - if (this->result.file == BAD_FILE) { - fail("Failed to get tree for expression operand:\n"); - ir->operands[operand]->fprint(stderr); - fprintf(stderr, "\n"); - } - assert(this->result.file == GRF || - this->result.file == UNIFORM || this->result.file == ATTR); - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - /* And then those vector operands should have been broken down to scalar. - */ - assert(!ir->operands[operand]->type->is_vector()); - } - - /* Storage for our result. If our result goes into an assignment, it will - * just get copy-propagated out, so no worries. - */ - this->result = vgrf(ir->type); - - switch (ir->operation) { - case ir_unop_logic_not: - emit(NOT(this->result, op[0])); - break; - case ir_unop_neg: - op[0].negate = !op[0].negate; - emit(MOV(this->result, op[0])); - break; - case ir_unop_abs: - op[0].abs = true; - op[0].negate = false; - emit(MOV(this->result, op[0])); - break; - case ir_unop_sign: - if (ir->type->is_float()) { - /* AND(val, 0x80000000) gives the sign bit. - * - * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not - * zero. - */ - emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - - op[0].type = BRW_REGISTER_TYPE_UD; - this->result.type = BRW_REGISTER_TYPE_UD; - emit(AND(this->result, op[0], fs_reg(0x80000000u))); - - inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u))); - inst->predicate = BRW_PREDICATE_NORMAL; - - this->result.type = BRW_REGISTER_TYPE_F; - } else { - /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). - * -> non-negative val generates 0x00000000. - * Predicated OR sets 1 if val is positive. - */ - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G)); - - emit(ASR(this->result, op[0], fs_reg(31))); - - inst = emit(OR(this->result, this->result, fs_reg(1))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - break; - case ir_unop_rcp: - emit_math(SHADER_OPCODE_RCP, this->result, op[0]); - break; - - case ir_unop_exp2: - emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); - break; - case ir_unop_log2: - emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); - break; - case ir_unop_exp: - case ir_unop_log: - unreachable("not reached: should be handled by ir_explog_to_explog2"); - case ir_unop_sin: - emit_math(SHADER_OPCODE_SIN, this->result, op[0]); - break; - case ir_unop_cos: - emit_math(SHADER_OPCODE_COS, this->result, op[0]); - break; - - case ir_unop_dFdx: - /* Select one of the two opcodes based on the glHint value. */ - if (fs_key->high_quality_derivatives) - emit(FS_OPCODE_DDX_FINE, this->result, op[0]); - else - emit(FS_OPCODE_DDX_COARSE, this->result, op[0]); - break; - - case ir_unop_dFdx_coarse: - emit(FS_OPCODE_DDX_COARSE, this->result, op[0]); - break; - - case ir_unop_dFdx_fine: - emit(FS_OPCODE_DDX_FINE, this->result, op[0]); - break; - - case ir_unop_dFdy: - /* Select one of the two opcodes based on the glHint value. */ - if (fs_key->high_quality_derivatives) - emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo)); - else - emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo)); - break; - - case ir_unop_dFdy_coarse: - emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo)); - break; - - case ir_unop_dFdy_fine: - emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo)); - break; - - case ir_binop_add: - emit(ADD(this->result, op[0], op[1])); - break; - case ir_binop_sub: - unreachable("not reached: should be handled by ir_sub_to_add_neg"); - - case ir_binop_mul: - if (devinfo->gen < 8 && ir->type->is_integer()) { - /* For integer multiplication, the MUL uses the low 16 bits - * of one of the operands (src0 on gen6, src1 on gen7). The - * MACH accumulates in the contribution of the upper 16 bits - * of that operand. - */ - if (ir->operands[0]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(this->result, op[0], op[1])); - else - emit(MUL(this->result, op[1], op[0])); - } else if (ir->operands[1]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(this->result, op[1], op[0])); - else - emit(MUL(this->result, op[0], op[1])); - } else { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - this->result.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(reg_null_d, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - } - } else { - emit(MUL(this->result, op[0], op[1])); - } - break; - case ir_binop_imul_high: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - this->result.type); - - fs_inst *mul = emit(MUL(acc, op[0], op[1])); - emit(MACH(this->result, op[0], op[1])); - - /* Until Gen8, integer multiplies read 32-bits from one source, and - * 16-bits from the other, and relying on the MACH instruction to - * generate the high bits of the result. - * - * On Gen8, the multiply instruction does a full 32x32-bit multiply, - * but in order to do a 64x64-bit multiply we have to simulate the - * previous behavior and then use a MACH instruction. - * - * FINISHME: Don't use source modifiers on src1. - */ - if (devinfo->gen >= 8) { - assert(mul->src[1].type == BRW_REGISTER_TYPE_D || - mul->src[1].type == BRW_REGISTER_TYPE_UD); - if (mul->src[1].type == BRW_REGISTER_TYPE_D) { - mul->src[1].type = BRW_REGISTER_TYPE_W; - mul->src[1].stride = 2; - } else { - mul->src[1].type = BRW_REGISTER_TYPE_UW; - mul->src[1].stride = 2; - } - } - - break; - } - case ir_binop_div: - /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); - break; - case ir_binop_carry: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - emit(ADDC(reg_null_ud, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - break; - } - case ir_binop_borrow: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - emit(SUBB(reg_null_ud, op[0], op[1])); - emit(MOV(this->result, fs_reg(acc))); - break; - } - case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); - break; - - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); - } - - emit(CMP(this->result, op[0], op[1], - brw_conditional_for_comparison(ir->operation))); - break; - - case ir_binop_logic_xor: - emit(XOR(this->result, op[0], op[1])); - break; - - case ir_binop_logic_or: - emit(OR(this->result, op[0], op[1])); - break; - - case ir_binop_logic_and: - emit(AND(this->result, op[0], op[1])); - break; - - case ir_binop_dot: - case ir_unop_any: - unreachable("not reached: should be handled by brw_fs_channel_expressions"); - - case ir_unop_noise: - unreachable("not reached: should be handled by lower_noise"); - - case ir_quadop_vector: - unreachable("not reached: should be handled by lower_quadop_vector"); - - case ir_binop_vector_extract: - unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()"); - - case ir_triop_vector_insert: - unreachable("not reached: should be handled by lower_vector_insert()"); - - case ir_binop_ldexp: - unreachable("not reached: should be handled by ldexp_to_arith()"); - - case ir_unop_sqrt: - emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); - break; - - case ir_unop_rsq: - emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); - break; - - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - op[0].type = BRW_REGISTER_TYPE_F; - this->result = op[0]; - break; - case ir_unop_i2u: - case ir_unop_bitcast_f2u: - op[0].type = BRW_REGISTER_TYPE_UD; - this->result = op[0]; - break; - case ir_unop_u2i: - case ir_unop_bitcast_f2i: - op[0].type = BRW_REGISTER_TYPE_D; - this->result = op[0]; - break; - case ir_unop_i2f: - case ir_unop_u2f: - case ir_unop_f2i: - case ir_unop_f2u: - emit(MOV(this->result, op[0])); - break; - - case ir_unop_b2i: - emit(AND(this->result, op[0], fs_reg(1))); - break; - case ir_unop_b2f: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - } - op[0].type = BRW_REGISTER_TYPE_D; - this->result.type = BRW_REGISTER_TYPE_D; - emit(AND(this->result, op[0], fs_reg(0x3f800000u))); - this->result.type = BRW_REGISTER_TYPE_F; - break; - - case ir_unop_f2b: - emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - break; - case ir_unop_i2b: - emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - break; - - case ir_unop_trunc: - emit(RNDZ(this->result, op[0])); - break; - case ir_unop_ceil: { - fs_reg tmp = vgrf(ir->type); - op[0].negate = !op[0].negate; - emit(RNDD(tmp, op[0])); - tmp.negate = true; - emit(MOV(this->result, tmp)); - } - break; - case ir_unop_floor: - emit(RNDD(this->result, op[0])); - break; - case ir_unop_fract: - emit(FRC(this->result, op[0])); - break; - case ir_unop_round_even: - emit(RNDE(this->result, op[0])); - break; - - case ir_binop_min: - case ir_binop_max: - resolve_ud_negate(&op[0]); - resolve_ud_negate(&op[1]); - emit_minmax(ir->operation == ir_binop_min ? - BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE, - this->result, op[0], op[1]); - break; - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_unorm_4x8: - case ir_unop_unpack_half_2x16: - case ir_unop_pack_half_2x16: - unreachable("not reached: should be handled by lower_packing_builtins"); - case ir_unop_unpack_half_2x16_split_x: - emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]); - break; - case ir_unop_unpack_half_2x16_split_y: - emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]); - break; - case ir_binop_pow: - emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); - break; - - case ir_unop_bitfield_reverse: - emit(BFREV(this->result, op[0])); - break; - case ir_unop_bit_count: - emit(CBIT(this->result, op[0])); - break; - case ir_unop_find_msb: - temp = vgrf(glsl_type::uint_type); - emit(FBH(temp, op[0])); - - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - - /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ - emit(MOV(this->result, temp)); - emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ)); - - temp.negate = true; - inst = emit(ADD(this->result, temp, fs_reg(31))); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - case ir_unop_find_lsb: - emit(FBL(this->result, op[0])); - break; - case ir_unop_saturate: - inst = emit(MOV(this->result, op[0])); - inst->saturate = true; - break; - case ir_triop_bitfield_extract: - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(BFE(this->result, op[2], op[1], op[0])); - break; - case ir_binop_bfm: - emit(BFI1(this->result, op[0], op[1])); - break; - case ir_triop_bfi: - emit(BFI2(this->result, op[0], op[1], op[2])); - break; - case ir_quadop_bitfield_insert: - unreachable("not reached: should be handled by " - "lower_instructions::bitfield_insert_to_bfm_bfi"); - - case ir_unop_bit_not: - emit(NOT(this->result, op[0])); - break; - case ir_binop_bit_and: - emit(AND(this->result, op[0], op[1])); - break; - case ir_binop_bit_xor: - emit(XOR(this->result, op[0], op[1])); - break; - case ir_binop_bit_or: - emit(OR(this->result, op[0], op[1])); - break; - - case ir_binop_lshift: - emit(SHL(this->result, op[0], op[1])); - break; - - case ir_binop_rshift: - if (ir->type->base_type == GLSL_TYPE_INT) - emit(ASR(this->result, op[0], op[1])); - else - emit(SHR(this->result, op[0], op[1])); - break; - case ir_binop_pack_half_2x16_split: - emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]); - break; - case ir_binop_ubo_load: { - /* This IR node takes a constant uniform block and a constant or - * variable byte offset within the block and loads a vector from that. - */ - ir_constant *const_uniform_block = ir->operands[0]->as_constant(); - ir_constant *const_offset = ir->operands[1]->as_constant(); - fs_reg surf_index; - uint32_t binding, set, index, set_index; - - if (const_uniform_block) { - /* The block index is a constant, so just emit the binding table entry - * as an immediate. - */ - index = const_uniform_block->value.u[0]; - set = shader->base.UniformBlocks[index].Set; - set_index = shader->base.UniformBlocks[index].Binding; - binding = stage_prog_data->bind_map[set].index[set_index]; - surf_index = fs_reg(binding); - } else { - assert(0 && "need more info from the ir for this."); - - /* The block index is not a constant. Evaluate the index expression - * per-channel and add the base UBO index; we have to select a value - * from any live channel. - */ - surf_index = vgrf(glsl_type::uint_type); - emit(ADD(surf_index, op[0], - fs_reg(stage_prog_data->binding_table.ubo_start))); - emit_uniformize(surf_index, surf_index); - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + - shader_prog->NumUniformBlocks - 1); - } - - if (const_offset) { - fs_reg packed_consts = vgrf(glsl_type::float_type); - packed_consts.type = result.type; - - fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); - emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, - packed_consts, surf_index, const_offset_reg)); - - for (int i = 0; i < ir->type->vector_elements; i++) { - packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i); - - /* The std140 packing rules don't allow vectors to cross 16-byte - * boundaries, and a reg is 32 bytes. - */ - assert(packed_consts.subreg_offset < 32); - - /* UBO bools are any nonzero value. We consider bools to be - * values with the low bit set to 1. Convert them using CMP. - */ - if (ir->type->base_type == GLSL_TYPE_BOOL) { - emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ)); - } else { - emit(MOV(result, packed_consts)); - } - - result = offset(result, 1); - } - } else { - /* Turn the byte offset into a dword offset. */ - fs_reg base_offset = vgrf(glsl_type::int_type); - emit(SHR(base_offset, op[1], fs_reg(2))); - - for (int i = 0; i < ir->type->vector_elements; i++) { - emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, - base_offset, i)); - - if (ir->type->base_type == GLSL_TYPE_BOOL) - emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ)); - - result = offset(result, 1); - } - } - - result.reg_offset = 0; - break; - } - - case ir_triop_fma: - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(MAD(this->result, op[2], op[1], op[0])); - break; - - case ir_triop_lrp: - emit_lrp(this->result, op[0], op[1], op[2]); - break; - - case ir_triop_csel: - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - unreachable("already handled above"); - break; - - case ir_unop_d2f: - case ir_unop_f2d: - case ir_unop_d2i: - case ir_unop_i2d: - case ir_unop_d2u: - case ir_unop_u2d: - case ir_unop_d2b: - case ir_unop_pack_double_2x32: - case ir_unop_unpack_double_2x32: - case ir_unop_frexp_sig: - case ir_unop_frexp_exp: - unreachable("fp64 todo"); - break; - } -} - -void -fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, - const glsl_type *type, bool predicated) -{ - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - for (unsigned int i = 0; i < type->components(); i++) { - l.type = brw_type_for_base_type(type); - r.type = brw_type_for_base_type(type); - - if (predicated || !l.equals(r)) { - fs_inst *inst = emit(MOV(l, r)); - inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE; - } - - l = offset(l, 1); - r = offset(r, 1); - } - break; - case GLSL_TYPE_ARRAY: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.array, predicated); - } - break; - - case GLSL_TYPE_STRUCT: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.structure[i].type, - predicated); - } - break; - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - break; - - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_FUNCTION: - unreachable("not reached"); - } -} - -/* If the RHS processing resulted in an instruction generating a - * temporary value, and it would be easy to rewrite the instruction to - * generate its result right into the LHS instead, do so. This ends - * up reliably removing instructions where it can be tricky to do so - * later without real UD chain information. - */ -bool -fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, - fs_reg dst, - fs_reg src, - fs_inst *pre_rhs_inst, - fs_inst *last_rhs_inst) -{ - /* Only attempt if we're doing a direct assignment. */ - if (ir->condition || - !(ir->lhs->type->is_scalar() || - (ir->lhs->type->is_vector() && - ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) - return false; - - /* Make sure the last instruction generated our source reg. */ - fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, - last_rhs_inst, - src); - if (!modify) - return false; - - /* If last_rhs_inst wrote a different number of components than our LHS, - * we can't safely rewrite it. - */ - if (alloc.sizes[dst.reg] != modify->regs_written) - return false; - - /* Success! Rewrite the instruction. */ - modify->dst = dst; - - return true; -} - -void -fs_visitor::visit(ir_assignment *ir) -{ - fs_reg l, r; - fs_inst *inst; - - /* FINISHME: arrays on the lhs */ - ir->lhs->accept(this); - l = this->result; - - fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); - - ir->rhs->accept(this); - r = this->result; - - fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); - - assert(l.file != BAD_FILE); - assert(r.file != BAD_FILE); - - if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) - return; - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); - } - - if (ir->lhs->type->is_scalar() || - ir->lhs->type->is_vector()) { - for (int i = 0; i < ir->lhs->type->vector_elements; i++) { - if (ir->write_mask & (1 << i)) { - inst = emit(MOV(l, r)); - if (ir->condition) - inst->predicate = BRW_PREDICATE_NORMAL; - r = offset(r, 1); - } - l = offset(l, 1); - } - } else { - emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); - } -} - fs_inst * fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, fs_reg coordinate, int coord_components, @@ -1458,7 +94,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, if (shadow_c.file != BAD_FILE) { for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } @@ -1466,7 +102,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * the unused slots must be zeroed. */ for (int i = coord_components; i < 3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); } mlen += 3; @@ -1474,25 +110,25 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ - emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); mlen++; } else if (op == ir_txb || op == ir_txl) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), lod); mlen++; } else { unreachable("Should not get here."); } - emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c); mlen++; } else if (op == ir_tex) { for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } /* zero the others. */ for (int i = coord_components; i<3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; @@ -1500,7 +136,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, fs_reg &dPdx = lod; for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate = offset(coordinate, 1); } /* the slots for u and v are always present, but r is optional */ @@ -1521,20 +157,20 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * m5 m6 m7 m8 m9 m10 */ for (int i = 0; i < grad_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx); dPdx = offset(dPdx, 1); } mlen += MAX2(grad_components, 2); for (int i = 0; i < grad_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy)); + bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy); dPdy = offset(dPdy, 1); } mlen += MAX2(grad_components, 2); } else if (op == ir_txs) { /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ simd16 = true; - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); mlen += 2; } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod @@ -1544,8 +180,8 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, assert(op == ir_txb || op == ir_txl || op == ir_txf); for (int i = 0; i < coord_components; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), - coordinate)); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), + coordinate); coordinate = offset(coordinate, 1); } @@ -1553,13 +189,13 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * be necessary for TXF (ld), but seems wise to do for all messages. */ for (int i = coord_components; i < 3; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f))); + bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); } /* lod/bias appears after u/v/r. */ mlen += 6; - emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod)); + bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod); mlen++; /* The unused upper half. */ @@ -1587,7 +223,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_size = 1; @@ -1595,7 +231,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, if (simd16) { for (int i = 0; i < 4; i++) { - emit(MOV(orig_dst, dst)); + bld.MOV(orig_dst, dst); orig_dst = offset(orig_dst, 1); dst = offset(dst, 2); } @@ -1621,7 +257,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, /* Copy the coordinates. */ for (int i = 0; i < vector_elements; i++) { - emit(MOV(retype(offset(message, i), coordinate.type), coordinate)); + bld.MOV(retype(offset(message, i), coordinate.type), coordinate); coordinate = offset(coordinate, 1); } @@ -1630,20 +266,20 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, /* Messages other than sample and ld require all three components */ if (has_lod || shadow_c.file != BAD_FILE) { for (int i = vector_elements; i < 3; i++) { - emit(MOV(offset(message, i), fs_reg(0.0f))); + bld.MOV(offset(message, i), fs_reg(0.0f)); } } if (has_lod) { fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ? BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); } if (shadow_c.file != BAD_FILE) { fs_reg msg_ref = offset(message, 3 + has_lod); - emit(MOV(msg_ref, shadow_c)); + bld.MOV(msg_ref, shadow_c); msg_end = offset(msg_ref, 1); } @@ -1658,7 +294,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, default: unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = message.reg - 1; inst->mlen = msg_end.reg - inst->base_mrf; inst->header_size = 1; @@ -1698,7 +334,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, } for (int i = 0; i < vector_elements; i++) { - emit(MOV(retype(offset(msg_coords, i), coordinate.type), coordinate)); + bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate); coordinate = offset(coordinate, 1); } fs_reg msg_end = offset(msg_coords, vector_elements); @@ -1706,7 +342,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, if (shadow_c.file != BAD_FILE) { fs_reg msg_shadow = msg_lod; - emit(MOV(msg_shadow, shadow_c)); + bld.MOV(msg_shadow, shadow_c); msg_lod = offset(msg_shadow, 1); msg_end = msg_lod; } @@ -1717,13 +353,13 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, opcode = SHADER_OPCODE_TEX; break; case ir_txb: - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = FS_OPCODE_TXB; break; case ir_txl: - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXL; @@ -1740,11 +376,11 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, */ msg_end = msg_lod; for (int i = 0; i < grad_components; i++) { - emit(MOV(msg_end, lod)); + bld.MOV(msg_end, lod); lod = offset(lod, 1); msg_end = offset(msg_end, 1); - emit(MOV(msg_end, lod2)); + bld.MOV(msg_end, lod2); lod2 = offset(lod2, 1); msg_end = offset(msg_end, 1); } @@ -1754,21 +390,21 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, } case ir_txs: msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD); - emit(MOV(msg_lod, lod)); + bld.MOV(msg_lod, lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXS; break; case ir_query_levels: msg_lod = msg_end; - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXS; break; case ir_txf: msg_lod = offset(msg_coords, 3); - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod); msg_end = offset(msg_lod, 1); opcode = SHADER_OPCODE_TXF; @@ -1776,9 +412,9 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, case ir_txf_ms: msg_lod = offset(msg_coords, 3); /* lod */ - emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); /* sample index */ - emit(MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index)); + bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index); msg_end = offset(msg_lod, 2); opcode = SHADER_OPCODE_TXF_CMS; @@ -1793,7 +429,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); inst->base_mrf = message.reg; inst->mlen = msg_end.reg - message.reg; inst->header_size = header_size; @@ -1851,7 +487,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, } if (shadow_c.file != BAD_FILE) { - emit(MOV(sources[length], shadow_c)); + bld.MOV(sources[length], shadow_c); length++; } @@ -1874,11 +510,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, case ir_lod: break; case ir_txb: - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); length++; break; case ir_txl: - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); length++; break; case ir_txd: { @@ -1888,7 +524,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z */ for (int i = 0; i < coord_components; i++) { - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; @@ -1896,11 +532,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * only derivatives for (u, v, r). */ if (i < grad_components) { - emit(MOV(sources[length], lod)); + bld.MOV(sources[length], lod); lod = offset(lod, 1); length++; - emit(MOV(sources[length], lod2)); + bld.MOV(sources[length], lod2); lod2 = offset(lod2, 1); length++; } @@ -1910,11 +546,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, break; } case ir_txs: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod); length++; break; case ir_query_levels: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u))); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)); length++; break; case ir_txf: @@ -1922,23 +558,23 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * On Gen9 they are u, v, lod, r */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; if (devinfo->gen >= 9) { if (coord_components >= 2) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); } length++; } - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod); length++; for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; } @@ -1946,18 +582,18 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, coordinate_done = true; break; case ir_txf_ms: - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); length++; /* data from the multisample control surface */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); length++; /* there is no offsetting for this message; just copy in the integer * texture coordinates */ for (int i = 0; i < coord_components; i++) { - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); length++; } @@ -1971,19 +607,19 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, /* More crazy intermixing */ for (int i = 0; i < 2; i++) { /* u, v */ - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } for (int i = 0; i < 2; i++) { /* offu, offv */ - emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value)); + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value); offset_value = offset(offset_value, 1); length++; } if (coord_components == 3) { /* r if present */ - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } @@ -1996,7 +632,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, /* Set up the coordinate (except for cases where it was done above) */ if (!coordinate_done) { for (int i = 0; i < coord_components; i++) { - emit(MOV(sources[length], coordinate)); + bld.MOV(sources[length], coordinate); coordinate = offset(coordinate, 1); length++; } @@ -2010,7 +646,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_F, dispatch_width); - emit(LOAD_PAYLOAD(src_payload, sources, length, header_size)); + bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); /* Generate the SEND */ enum opcode opcode; @@ -2033,7 +669,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, default: unreachable("not reached"); } - fs_inst *inst = emit(opcode, dst, src_payload, sampler); + fs_inst *inst = bld.emit(opcode, dst, src_payload, sampler); inst->base_mrf = -1; inst->mlen = mlen; inst->header_size = header_size; @@ -2051,7 +687,6 @@ fs_reg fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, bool is_rect, uint32_t sampler, int texunit) { - fs_inst *inst = NULL; bool needs_gl_clamp = true; fs_reg scale_x, scale_y; @@ -2110,10 +745,10 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, fs_reg src = coordinate; coordinate = dst; - emit(MUL(dst, src, scale_x)); + bld.MUL(dst, src, scale_x); dst = offset(dst, 1); src = offset(src, 1); - emit(MUL(dst, src, scale_y)); + bld.MUL(dst, src, scale_y); } else if (is_rect) { /* On gen6+, the sampler handles the rectangle coordinates * natively, without needing rescaling. But that means we have @@ -2127,8 +762,8 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, fs_reg chan = coordinate; chan = offset(chan, i); - inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_GE; + set_condmod(BRW_CONDITIONAL_GE, + bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f))); /* Our parameter comes in as 1.0/width or 1.0/height, * because that's what people normally want for doing @@ -2137,11 +772,11 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, * parameter type, so just invert back. */ fs_reg limit = vgrf(glsl_type::float_type); - emit(MOV(limit, i == 0 ? scale_x : scale_y)); - emit(SHADER_OPCODE_RCP, limit, limit); + bld.MOV(limit, i == 0 ? scale_x : scale_y); + bld.emit(SHADER_OPCODE_RCP, limit, limit); - inst = emit(BRW_OPCODE_SEL, chan, chan, limit); - inst->conditional_mod = BRW_CONDITIONAL_L; + set_condmod(BRW_CONDITIONAL_L, + bld.emit(BRW_OPCODE_SEL, chan, chan, limit)); } } } @@ -2151,9 +786,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; chan = offset(chan, i); - - fs_inst *inst = emit(MOV(chan, chan)); - inst->saturate = true; + set_saturate(true, bld.MOV(chan, chan)); } } } @@ -2173,13 +806,13 @@ fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) /* parameters are: u, v, r; missing parameters are treated as zero */ for (int i = 0; i < components; i++) { sources[i] = vgrf(glsl_type::float_type); - emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate)); + bld.MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate); coordinate = offset(coordinate, 1); } - emit(LOAD_PAYLOAD(payload, sources, components, 0)); + bld.LOAD_PAYLOAD(payload, sources, components, 0); - fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); + fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); inst->base_mrf = -1; inst->mlen = components * reg_width; inst->header_size = 0; @@ -2219,7 +852,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, this->result = res; for (int i=0; i<4; i++) { - emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f))); + bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)); res = offset(res, 1); } return; @@ -2276,7 +909,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, if (op == ir_txs && is_cube_array) { fs_reg depth = offset(dst, 2); fs_reg fixed_depth = vgrf(glsl_type::int_type); - emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); + bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); int components = inst->regs_written / (dst.width / 8); @@ -2287,167 +920,12 @@ fs_visitor::emit_texture(ir_texture_opcode op, fixed_payload[i] = offset(dst, i); } } - emit(LOAD_PAYLOAD(dst, fixed_payload, components, 0)); + bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0); } swizzle_result(op, dest_type->vector_elements, dst, sampler); } -void -fs_visitor::visit(ir_texture *ir) -{ - uint32_t sampler; - - ir_dereference_variable *deref_var = ir->sampler->as_dereference_variable(); - assert(deref_var); - ir_variable *var = deref_var->var; - - sampler = stage_prog_data->bind_map[var->data.set].index[var->data.index]; - - ir_rvalue *nonconst_sampler_index = - _mesa_get_sampler_array_nonconst_index(ir->sampler); - - /* Handle non-constant sampler array indexing */ - fs_reg sampler_reg; - if (nonconst_sampler_index) { - /* The highest sampler which may be used by this operation is - * the last element of the array. Mark it here, because the generator - * doesn't have enough information to determine the bound. - */ - uint32_t array_size = ir->sampler->as_dereference_array() - ->array->type->array_size(); - - uint32_t max_used = sampler + array_size - 1; - if (ir->op == ir_tg4 && devinfo->gen < 8) { - max_used += stage_prog_data->binding_table.gather_texture_start; - } else { - max_used += stage_prog_data->binding_table.texture_start; - } - - brw_mark_surface_used(prog_data, max_used); - - /* Emit code to evaluate the actual indexing expression */ - nonconst_sampler_index->accept(this); - fs_reg temp = vgrf(glsl_type::uint_type); - emit(ADD(temp, this->result, fs_reg(sampler))); - emit_uniformize(temp, temp); - - sampler_reg = temp; - } else { - /* Single sampler, or constant array index; the indexing expression - * is just an immediate. - */ - sampler_reg = fs_reg(sampler); - } - - /* FINISHME: We're failing to recompile our programs when the sampler is - * updated. This only matters for the texture rectangle scale parameters - * (pre-gen6, or gen6+ with GL_CLAMP). - */ - int texunit = prog->SamplerUnits[sampler]; - - /* Should be lowered by do_lower_texture_projection */ - assert(!ir->projector); - - /* Should be lowered */ - assert(!ir->offset || !ir->offset->type->is_array()); - - /* Generate code to compute all the subexpression trees. This has to be - * done before loading any values into MRFs for the sampler message since - * generating these values may involve SEND messages that need the MRFs. - */ - fs_reg coordinate; - int coord_components = 0; - if (ir->coordinate) { - coord_components = ir->coordinate->type->vector_elements; - ir->coordinate->accept(this); - coordinate = this->result; - } - - fs_reg shadow_comparitor; - if (ir->shadow_comparitor) { - ir->shadow_comparitor->accept(this); - shadow_comparitor = this->result; - } - - fs_reg offset_value; - if (ir->offset) { - ir_constant *const_offset = ir->offset->as_constant(); - if (const_offset) { - /* Store the header bitfield in an IMM register. This allows us to - * use offset_value.file to distinguish between no offset, a constant - * offset, and a non-constant offset. - */ - offset_value = - fs_reg(brw_texture_offset(const_offset->value.i, - const_offset->type->vector_elements)); - } else { - ir->offset->accept(this); - offset_value = this->result; - } - } - - fs_reg lod, lod2, sample_index, mcs; - int grad_components = 0; - switch (ir->op) { - case ir_tex: - case ir_lod: - case ir_tg4: - case ir_query_levels: - break; - case ir_txb: - ir->lod_info.bias->accept(this); - lod = this->result; - break; - case ir_txd: - ir->lod_info.grad.dPdx->accept(this); - lod = this->result; - - ir->lod_info.grad.dPdy->accept(this); - lod2 = this->result; - - grad_components = ir->lod_info.grad.dPdx->type->vector_elements; - break; - case ir_txf: - case ir_txl: - case ir_txs: - ir->lod_info.lod->accept(this); - lod = this->result; - break; - case ir_txf_ms: - ir->lod_info.sample_index->accept(this); - sample_index = this->result; - - if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << sampler)) { - mcs = emit_mcs_fetch(coordinate, ir->coordinate->type->vector_elements, - sampler_reg); - } else { - mcs = fs_reg(0u); - } - break; - default: - unreachable("Unrecognized texture opcode"); - }; - - int gather_component = 0; - if (ir->op == ir_tg4) - gather_component = ir->lod_info.component->as_constant()->value.i[0]; - - bool is_rect = - ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT; - - bool is_cube_array = - ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - ir->sampler->type->sampler_array; - - emit_texture(ir->op, ir->type, coordinate, coord_components, - shadow_comparitor, lod, lod2, grad_components, - sample_index, offset_value, mcs, - gather_component, is_cube_array, is_rect, sampler, - sampler_reg, texunit); -} - /** * Apply workarounds for Gen6 gather with UINT/SINT */ @@ -2462,16 +940,16 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) for (int i = 0; i < 4; i++) { fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); /* Convert from UNORM to UINT */ - emit(MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1)))); - emit(MOV(dst, dst_f)); + bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))); + bld.MOV(dst, dst_f); if (wa & WA_SIGN) { /* Reinterpret the UINT value as a signed INT value by * shifting the sign bit into place, then shifting back * preserving sign. */ - emit(SHL(dst, dst, fs_reg(32 - width))); - emit(ASR(dst, dst, fs_reg(32 - width))); + bld.SHL(dst, dst, fs_reg(32 - width)); + bld.ASR(dst, dst, fs_reg(32 - width)); } dst = offset(dst, 1); @@ -2535,461 +1013,18 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, l = offset(l, i); if (swiz == SWIZZLE_ZERO) { - emit(MOV(l, fs_reg(0.0f))); + bld.MOV(l, fs_reg(0.0f)); } else if (swiz == SWIZZLE_ONE) { - emit(MOV(l, fs_reg(1.0f))); + bld.MOV(l, fs_reg(1.0f)); } else { - emit(MOV(l, offset(orig_val, - GET_SWZ(key_tex->swizzles[sampler], i)))); + bld.MOV(l, offset(orig_val, + GET_SWZ(key_tex->swizzles[sampler], i))); } } this->result = swizzled_result; } } -void -fs_visitor::visit(ir_swizzle *ir) -{ - ir->val->accept(this); - fs_reg val = this->result; - - if (ir->type->vector_elements == 1) { - this->result = offset(this->result, ir->mask.x); - return; - } - - fs_reg result = vgrf(ir->type); - this->result = result; - - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - fs_reg channel = val; - int swiz = 0; - - switch (i) { - case 0: - swiz = ir->mask.x; - break; - case 1: - swiz = ir->mask.y; - break; - case 2: - swiz = ir->mask.z; - break; - case 3: - swiz = ir->mask.w; - break; - } - - emit(MOV(result, offset(channel, swiz))); - result = offset(result, 1); - } -} - -void -fs_visitor::visit(ir_discard *ir) -{ - /* We track our discarded pixels in f0.1. By predicating on it, we can - * update just the flag bits that aren't yet discarded. If there's no - * condition, we emit a CMP of g0 != g0, so all currently executing - * channels will get turned off. - */ - fs_inst *cmp; - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); - cmp = (fs_inst *) this->instructions.get_tail(); - cmp->conditional_mod = brw_negate_cmod(cmp->conditional_mod); - } else { - fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), - BRW_REGISTER_TYPE_UW)); - cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ)); - } - cmp->predicate = BRW_PREDICATE_NORMAL; - cmp->flag_subreg = 1; - - if (devinfo->gen >= 6) { - emit_discard_jump(); - } -} - -void -fs_visitor::visit(ir_constant *ir) -{ - /* Set this->result to reg at the bottom of the function because some code - * paths will cause this visitor to be applied to other fields. This will - * cause the value stored in this->result to be modified. - * - * Make reg constant so that it doesn't get accidentally modified along the - * way. Yes, I actually had this problem. :( - */ - const fs_reg reg = vgrf(ir->type); - fs_reg dst_reg = reg; - - if (ir->type->is_array()) { - const unsigned size = type_size(ir->type->fields.array); - - for (unsigned i = 0; i < ir->type->length; i++) { - ir->array_elements[i]->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(MOV(dst_reg, src_reg)); - src_reg = offset(src_reg, 1); - dst_reg = offset(dst_reg, 1); - } - } - } else if (ir->type->is_record()) { - foreach_in_list(ir_constant, field, &ir->components) { - const unsigned size = type_size(field->type); - - field->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(MOV(dst_reg, src_reg)); - src_reg = offset(src_reg, 1); - dst_reg = offset(dst_reg, 1); - } - } - } else { - const unsigned size = type_size(ir->type); - - for (unsigned i = 0; i < size; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(MOV(dst_reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(MOV(dst_reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(MOV(dst_reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(MOV(dst_reg, fs_reg(ir->value.b[i] != 0 ? ~0 : 0))); - break; - default: - unreachable("Non-float/uint/int/bool constant"); - } - dst_reg = offset(dst_reg, 1); - } - } - - this->result = reg; -} - -void -fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) -{ - ir_expression *expr = ir->as_expression(); - - if (!expr || expr->operation == ir_binop_ubo_load) { - ir->accept(this); - - fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - } - - fs_reg op[3]; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - - resolve_ud_negate(&op[i]); - } - - emit_bool_to_cond_code_of_reg(expr, op); -} - -void -fs_visitor::emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]) -{ - fs_inst *inst; - - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(AND(reg_null_d, op[0], fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - - case ir_binop_logic_xor: - if (devinfo->gen <= 5) { - fs_reg temp = vgrf(expr->type); - emit(XOR(temp, op[0], op[1])); - inst = emit(AND(reg_null_d, temp, fs_reg(1))); - } else { - inst = emit(XOR(reg_null_d, op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_or: - if (devinfo->gen <= 5) { - fs_reg temp = vgrf(expr->type); - emit(OR(temp, op[0], op[1])); - inst = emit(AND(reg_null_d, temp, fs_reg(1))); - } else { - inst = emit(OR(reg_null_d, op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_and: - if (devinfo->gen <= 5) { - fs_reg temp = vgrf(expr->type); - emit(AND(temp, op[0], op[1])); - inst = emit(AND(reg_null_d, temp, fs_reg(1))); - } else { - inst = emit(AND(reg_null_d, op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_unop_f2b: - if (devinfo->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_f, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_unop_i2b: - if (devinfo->gen >= 6) { - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - - emit(CMP(reg_null_d, op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - break; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to return. */ - fs_reg temp = vgrf(expr->operands[1]->type); - inst = emit(SEL(temp, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - - /* Expand the result to a condition code. */ - inst = emit(MOV(reg_null_d, temp)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - } - - default: - unreachable("not reached"); - } -} - -/** - * Emit a gen6 IF statement with the comparison folded into the IF - * instruction. - */ -void -fs_visitor::emit_if_gen6(ir_if *ir) -{ - ir_expression *expr = ir->condition->as_expression(); - - if (expr && expr->operation != ir_binop_ubo_load) { - fs_reg op[3]; - fs_inst *inst; - fs_reg temp; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - } - - switch (expr->operation) { - case ir_unop_logic_not: - emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_Z)); - return; - - case ir_binop_logic_xor: - emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_or: - temp = vgrf(glsl_type::bool_type); - emit(OR(temp, op[0], op[1])); - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_and: - temp = vgrf(glsl_type::bool_type); - emit(AND(temp, op[0], op[1])); - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_unop_f2b: - inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_unop_i2b: - emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - - emit(IF(op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - return; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - fs_inst *inst = emit(MOV(reg_null_d, op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to use as the result. */ - fs_reg temp = vgrf(expr->operands[1]->type); - inst = emit(SEL(temp, op[1], op[2])); - inst->predicate = BRW_PREDICATE_NORMAL; - - emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ)); - return; - } - - default: - unreachable("not reached"); - } - } - - ir->condition->accept(this); - emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ)); -} - -bool -fs_visitor::try_opt_frontfacing_ternary(ir_if *ir) -{ - ir_dereference_variable *deref = ir->condition->as_dereference_variable(); - if (!deref || strcmp(deref->var->name, "gl_FrontFacing") != 0) - return false; - - if (ir->then_instructions.length() != 1 || - ir->else_instructions.length() != 1) - return false; - - ir_assignment *then_assign = - ((ir_instruction *)ir->then_instructions.head)->as_assignment(); - ir_assignment *else_assign = - ((ir_instruction *)ir->else_instructions.head)->as_assignment(); - - if (!then_assign || then_assign->condition || - !else_assign || else_assign->condition || - then_assign->write_mask != else_assign->write_mask || - !then_assign->lhs->equals(else_assign->lhs)) - return false; - - ir_constant *then_rhs = then_assign->rhs->as_constant(); - ir_constant *else_rhs = else_assign->rhs->as_constant(); - - if (!then_rhs || !else_rhs) - return false; - - if (then_rhs->type->base_type != GLSL_TYPE_FLOAT) - return false; - - if ((then_rhs->is_one() && else_rhs->is_negative_one()) || - (else_rhs->is_one() && then_rhs->is_negative_one())) { - then_assign->lhs->accept(this); - fs_reg dst = this->result; - dst.type = BRW_REGISTER_TYPE_D; - fs_reg tmp = vgrf(glsl_type::int_type); - - if (devinfo->gen >= 6) { - /* Bit 15 of g0.0 is 0 if the polygon is front facing. */ - fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); - - /* For (gl_FrontFacing ? 1.0 : -1.0), emit: - * - * or(8) tmp.1<2>W g0.0<0,1,0>W 0x00003f80W - * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D - * - * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0). - */ - - if (then_rhs->is_negative_one()) { - assert(else_rhs->is_one()); - g0.negate = true; - } - - tmp.type = BRW_REGISTER_TYPE_W; - tmp.subreg_offset = 2; - tmp.stride = 2; - - fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80))); - or_inst->src[1].type = BRW_REGISTER_TYPE_UW; - - tmp.type = BRW_REGISTER_TYPE_D; - tmp.subreg_offset = 0; - tmp.stride = 1; - } else { - /* Bit 31 of g1.6 is 0 if the polygon is front facing. */ - fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); - - /* For (gl_FrontFacing ? 1.0 : -1.0), emit: - * - * or(8) tmp<1>D g1.6<0,1,0>D 0x3f800000D - * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D - * - * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0). - */ - - if (then_rhs->is_negative_one()) { - assert(else_rhs->is_one()); - g1_6.negate = true; - } - - emit(OR(tmp, g1_6, fs_reg(0x3f800000))); - } - emit(AND(dst, tmp, fs_reg(0xbf800000))); - return true; - } - - return false; -} - /** * Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL. * @@ -3056,21 +1091,21 @@ fs_visitor::try_replace_with_sel() if (src0.file == IMM) { src0 = vgrf(glsl_type::float_type); src0.type = then_mov->src[0].type; - emit(MOV(src0, then_mov->src[0])); + bld.MOV(src0, then_mov->src[0]); } - fs_inst *sel; if (if_inst->conditional_mod) { /* Sandybridge-specific IF with embedded comparison */ - emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod)); - sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); - sel->predicate = BRW_PREDICATE_NORMAL; + bld.CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); + set_predicate(BRW_PREDICATE_NORMAL, + bld.emit(BRW_OPCODE_SEL, then_mov->dst, + src0, else_mov->src[0])); } else { /* Separate CMP and IF instructions */ - sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]); - sel->predicate = if_inst->predicate; - sel->predicate_inverse = if_inst->predicate_inverse; + set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse, + bld.emit(BRW_OPCODE_SEL, then_mov->dst, + src0, else_mov->src[0])); } return true; @@ -3080,178 +1115,6 @@ fs_visitor::try_replace_with_sel() } void -fs_visitor::visit(ir_if *ir) -{ - if (try_opt_frontfacing_ternary(ir)) - return; - - /* Don't point the annotation at the if statement, because then it plus - * the then and else blocks get printed. - */ - this->base_ir = ir->condition; - - if (devinfo->gen == 6) { - emit_if_gen6(ir); - } else { - emit_bool_to_cond_code(ir->condition); - - emit(IF(BRW_PREDICATE_NORMAL)); - } - - foreach_in_list(ir_instruction, ir_, &ir->then_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - - if (!ir->else_instructions.is_empty()) { - emit(BRW_OPCODE_ELSE); - - foreach_in_list(ir_instruction, ir_, &ir->else_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - } - - emit(BRW_OPCODE_ENDIF); - - if (!try_replace_with_sel() && devinfo->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } -} - -void -fs_visitor::visit(ir_loop *ir) -{ - if (devinfo->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } - - this->base_ir = NULL; - emit(BRW_OPCODE_DO); - - foreach_in_list(ir_instruction, ir_, &ir->body_instructions) { - this->base_ir = ir_; - ir_->accept(this); - } - - this->base_ir = NULL; - emit(BRW_OPCODE_WHILE); -} - -void -fs_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(BRW_OPCODE_BREAK); - break; - case ir_loop_jump::jump_continue: - emit(BRW_OPCODE_CONTINUE); - break; - } -} - -void -fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir) -{ - ir_dereference *deref = static_cast<ir_dereference *>( - ir->actual_parameters.get_head()); - ir_variable *location = deref->variable_referenced(); - unsigned surf_index = (stage_prog_data->binding_table.abo_start + - location->data.binding); - - /* Calculate the surface offset */ - fs_reg offset = vgrf(glsl_type::uint_type); - ir_dereference_array *deref_array = deref->as_dereference_array(); - - if (deref_array) { - deref_array->array_index->accept(this); - - fs_reg tmp = vgrf(glsl_type::uint_type); - emit(MUL(tmp, this->result, fs_reg(ATOMIC_COUNTER_SIZE))); - emit(ADD(offset, tmp, fs_reg(location->data.atomic.offset))); - } else { - offset = fs_reg(location->data.atomic.offset); - } - - /* Emit the appropriate machine instruction */ - const char *callee = ir->callee->function_name(); - ir->return_deref->accept(this); - fs_reg dst = this->result; - - if (!strcmp("__intrinsic_atomic_read", callee)) { - emit_untyped_surface_read(surf_index, dst, offset); - - } else if (!strcmp("__intrinsic_atomic_increment", callee)) { - emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, - fs_reg(), fs_reg()); - - } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { - emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, - fs_reg(), fs_reg()); - } -} - -void -fs_visitor::visit(ir_call *ir) -{ - const char *callee = ir->callee->function_name(); - - if (!strcmp("__intrinsic_atomic_read", callee) || - !strcmp("__intrinsic_atomic_increment", callee) || - !strcmp("__intrinsic_atomic_predecrement", callee)) { - visit_atomic_counter_intrinsic(ir); - } else { - unreachable("Unsupported intrinsic."); - } -} - -void -fs_visitor::visit(ir_return *) -{ - unreachable("FINISHME"); -} - -void -fs_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined before we get to ir_to_mesa. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - foreach_in_list(ir_instruction, ir_, &sig->body) { - this->base_ir = ir_; - ir_->accept(this); - } - } -} - -void -fs_visitor::visit(ir_function_signature *) -{ - unreachable("not reached"); -} - -void -fs_visitor::visit(ir_emit_vertex *) -{ - unreachable("not reached"); -} - -void -fs_visitor::visit(ir_end_primitive *) -{ - unreachable("not reached"); -} - -void fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg dst, fs_reg offset, fs_reg src0, fs_reg src1) @@ -3263,17 +1126,16 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ - emit(MOV(sources[0], fs_reg(0u))) - ->force_writemask_all = true; + bld.exec_all().MOV(sources[0], fs_reg(0u)); if (stage == MESA_SHADER_FRAGMENT) { if (((brw_wm_prog_data*)this->prog_data)->uses_kill) { - emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), brw_flag_reg(0, 1)); } else { - emit(MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); } } else { /* The execution mask is part of the side-band information sent together with @@ -3282,37 +1144,37 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, * the atomic operation. */ assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE); - emit(MOV(component(sources[0], 7), - fs_reg(0xffffu)))->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), fs_reg(0xffffu)); } length++; /* Set the atomic operation offset. */ sources[1] = vgrf(glsl_type::uint_type); - emit(MOV(sources[1], offset)); + bld.MOV(sources[1], offset); length++; /* Set the atomic operation arguments. */ if (src0.file != BAD_FILE) { sources[length] = vgrf(glsl_type::uint_type); - emit(MOV(sources[length], src0)); + bld.MOV(sources[length], src0); length++; } if (src1.file != BAD_FILE) { sources[length] = vgrf(glsl_type::uint_type); - emit(MOV(sources[length], src1)); + bld.MOV(sources[length], src1); length++; } int mlen = 1 + (length - 1) * reg_width; fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD, dispatch_width); - emit(LOAD_PAYLOAD(src_payload, sources, length, 1)); + bld.LOAD_PAYLOAD(src_payload, sources, length, 1); /* Emit the instruction. */ - fs_inst *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, - fs_reg(surf_index), fs_reg(atomic_op)); + fs_inst *inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, + fs_reg(surf_index), fs_reg(atomic_op)); inst->mlen = mlen; } @@ -3326,17 +1188,17 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ - emit(MOV(sources[0], fs_reg(0u))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(sources[0], fs_reg(0u)); if (stage == MESA_SHADER_FRAGMENT) { if (((brw_wm_prog_data*)this->prog_data)->uses_kill) { - emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), brw_flag_reg(0, 1)); } else { - emit(MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD))) - ->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), + retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); } } else { /* The execution mask is part of the side-band information sent together with @@ -3345,48 +1207,25 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, * the atomic operation. */ assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE); - emit(MOV(component(sources[0], 7), - fs_reg(0xffffu)))->force_writemask_all = true; + bld.exec_all() + .MOV(component(sources[0], 7), fs_reg(0xffffu)); } /* Set the surface read offset. */ sources[1] = vgrf(glsl_type::uint_type); - emit(MOV(sources[1], offset)); + bld.MOV(sources[1], offset); int mlen = 1 + reg_width; fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD, dispatch_width); - fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2, 1)); + fs_inst *inst = bld.LOAD_PAYLOAD(src_payload, sources, 2, 1); /* Emit the instruction. */ - inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload, - fs_reg(surf_index), fs_reg(1)); + inst = bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload, + fs_reg(surf_index), fs_reg(1)); inst->mlen = mlen; } -fs_inst * -fs_visitor::emit(fs_inst *inst) -{ - if (dispatch_width == 16 && inst->exec_size == 8) - inst->force_uncompressed = true; - - inst->annotation = this->current_annotation; - inst->ir = this->base_ir; - - this->instructions.push_tail(inst); - - return inst; -} - -void -fs_visitor::emit(exec_list list) -{ - foreach_in_list_safe(fs_inst, inst, &list) { - inst->exec_node::remove(); - emit(inst); - } -} - /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ void fs_visitor::emit_dummy_fs() @@ -3396,12 +1235,12 @@ fs_visitor::emit_dummy_fs() /* Everyone's favorite color. */ const float color[4] = { 1.0, 0.0, 1.0, 0.0 }; for (int i = 0; i < 4; i++) { - emit(MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F, - dispatch_width), fs_reg(color[i]))); + bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F, + dispatch_width), fs_reg(color[i])); } fs_inst *write; - write = emit(FS_OPCODE_FB_WRITE); + write = bld.emit(FS_OPCODE_FB_WRITE); write->eot = true; if (devinfo->gen >= 6) { write->base_mrf = 2; @@ -3454,19 +1293,19 @@ fs_visitor::emit_interpolation_setup_gen4() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - this->current_annotation = "compute pixel centers"; + fs_builder abld = bld.annotate("compute pixel centers"); this->pixel_x = vgrf(glsl_type::uint_type); this->pixel_y = vgrf(glsl_type::uint_type); this->pixel_x.type = BRW_REGISTER_TYPE_UW; this->pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(ADD(this->pixel_x, + abld.ADD(this->pixel_x, fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(ADD(this->pixel_y, + fs_reg(brw_imm_v(0x10101010))); + abld.ADD(this->pixel_y, fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); + fs_reg(brw_imm_v(0x11001100))); - this->current_annotation = "compute pixel deltas from v0"; + abld = bld.annotate("compute pixel deltas from v0"); this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = vgrf(glsl_type::vec2_type); @@ -3475,27 +1314,27 @@ fs_visitor::emit_interpolation_setup_gen4() const fs_reg ystart(negate(brw_vec1_grf(1, 1))); if (devinfo->has_pln && dispatch_width == 16) { - emit(ADD(half(offset(delta_xy, 0), 0), half(this->pixel_x, 0), xstart)); - emit(ADD(half(offset(delta_xy, 0), 1), half(this->pixel_y, 0), ystart)); - emit(ADD(half(offset(delta_xy, 1), 0), half(this->pixel_x, 1), xstart)) - ->force_sechalf = true; - emit(ADD(half(offset(delta_xy, 1), 1), half(this->pixel_y, 1), ystart)) - ->force_sechalf = true; + for (unsigned i = 0; i < 2; i++) { + abld.half(i).ADD(half(offset(delta_xy, i), 0), + half(this->pixel_x, i), xstart); + abld.half(i).ADD(half(offset(delta_xy, i), 1), + half(this->pixel_y, i), ystart); + } } else { - emit(ADD(offset(delta_xy, 0), this->pixel_x, xstart)); - emit(ADD(offset(delta_xy, 1), this->pixel_y, ystart)); + abld.ADD(offset(delta_xy, 0), this->pixel_x, xstart); + abld.ADD(offset(delta_xy, 1), this->pixel_y, ystart); } - this->current_annotation = "compute pos.w and 1/pos.w"; + abld = bld.annotate("compute pos.w and 1/pos.w"); /* Compute wpos.w. It's always in our setup, since it's needed to * interpolate the other attributes. */ this->wpos_w = vgrf(glsl_type::float_type); - emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, interp_reg(VARYING_SLOT_POS, 3)); + abld.emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, + interp_reg(VARYING_SLOT_POS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); - this->current_annotation = NULL; + abld.emit(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); } /** Emits the interpolation for the varying inputs. */ @@ -3504,8 +1343,8 @@ fs_visitor::emit_interpolation_setup_gen6() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - this->current_annotation = "compute pixel centers"; - if (brw->gen >= 8 || dispatch_width == 8) { + fs_builder abld = bld.annotate("compute pixel centers"); + if (devinfo->gen >= 8 || dispatch_width == 8) { /* The "Register Region Restrictions" page says for BDW (and newer, * presumably): * @@ -3518,15 +1357,15 @@ fs_visitor::emit_interpolation_setup_gen6() */ fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8), BRW_REGISTER_TYPE_UW, dispatch_width * 2); - emit(ADD(int_pixel_xy, + abld.exec_all() + .ADD(int_pixel_xy, fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)), - fs_reg(brw_imm_v(0x11001010)))) - ->force_writemask_all = true; + fs_reg(brw_imm_v(0x11001010))); this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); - emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy); - emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy); + abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy); + abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy); } else { /* The "Register Region Restrictions" page says for SNB, IVB, HSW: * @@ -3540,12 +1379,12 @@ fs_visitor::emit_interpolation_setup_gen6() fs_reg int_pixel_y = vgrf(glsl_type::uint_type); int_pixel_x.type = BRW_REGISTER_TYPE_UW; int_pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(ADD(int_pixel_x, + abld.ADD(int_pixel_x, fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(ADD(int_pixel_y, + fs_reg(brw_imm_v(0x10101010))); + abld.ADD(int_pixel_y, fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); + fs_reg(brw_imm_v(0x11001100))); /* As of gen6, we can no longer mix float and int sources. We have * to turn the integer pixel centers into floats for their actual @@ -3553,21 +1392,19 @@ fs_visitor::emit_interpolation_setup_gen6() */ this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); - emit(MOV(this->pixel_x, int_pixel_x)); - emit(MOV(this->pixel_y, int_pixel_y)); + abld.MOV(this->pixel_x, int_pixel_x); + abld.MOV(this->pixel_y, int_pixel_y); } - this->current_annotation = "compute pos.w"; + abld = bld.annotate("compute pos.w"); this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0)); this->wpos_w = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); + abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { uint8_t reg = payload.barycentric_coord_reg[i]; this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0)); } - - this->current_annotation = NULL; } void @@ -3581,7 +1418,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, fs_reg tmp = vgrf(glsl_type::vec4_type); assert(color.type == BRW_REGISTER_TYPE_F); for (unsigned i = 0; i < components; i++) { - inst = emit(MOV(offset(tmp, i), offset(color, i))); + inst = bld.MOV(offset(tmp, i), offset(color, i)); inst->saturate = true; } color = tmp; @@ -3627,7 +1464,7 @@ fs_visitor::emit_alpha_test() { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - this->current_annotation = "Alpha test"; + const fs_builder abld = bld.annotate("Alpha test"); fs_inst *cmp; if (key->alpha_test_func == GL_ALWAYS) @@ -3637,30 +1474,29 @@ fs_visitor::emit_alpha_test() /* f0.1 = 0 */ fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - cmp = emit(CMP(reg_null_f, some_reg, some_reg, - BRW_CONDITIONAL_NEQ)); + cmp = abld.CMP(bld.null_reg_f(), some_reg, some_reg, + BRW_CONDITIONAL_NEQ); } else { /* RT0 alpha */ fs_reg color = offset(outputs[0], 3); /* f0.1 &= func(color, ref) */ - cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref), - cond_for_alpha_func(key->alpha_test_func))); + cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref), + cond_for_alpha_func(key->alpha_test_func)); } cmp->predicate = BRW_PREDICATE_NORMAL; cmp->flag_subreg = 1; } fs_inst * -fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, +fs_visitor::emit_single_fb_write(const fs_builder &bld, + fs_reg color0, fs_reg color1, fs_reg src0_alpha, unsigned components, unsigned exec_size, bool use_2nd_half) { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - - this->current_annotation = "FB write header"; int header_size = 2, payload_header_size; /* We can potentially have a message length of up to 15, so we have to set @@ -3691,22 +1527,23 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (payload.aa_dest_stencil_reg) { sources[length] = fs_reg(GRF, alloc.allocate(1)); - emit(MOV(sources[length], - fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)))); + bld.exec_all().annotate("FB write stencil/AA alpha") + .MOV(sources[length], + fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); length++; } prog_data->uses_omask = prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); if (prog_data->uses_omask) { - this->current_annotation = "FB write oMask"; assert(this->sample_mask.file != BAD_FILE); /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since * it's unsinged single words, one vgrf is always 16-wide. */ sources[length] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UW, 16); - emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); + bld.exec_all().annotate("FB write oMask") + .emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); length++; } @@ -3752,7 +1589,11 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { /* Hand over gl_FragDepth. */ assert(this->frag_depth.file != BAD_FILE); - sources[length] = this->frag_depth; + if (exec_size < dispatch_width) { + sources[length] = half(this->frag_depth, use_2nd_half); + } else { + sources[length] = this->frag_depth; + } } else { /* Pass through the payload depth. */ sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); @@ -3763,28 +1604,29 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (payload.dest_depth_reg) sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)); + const fs_builder ubld = bld.group(exec_size, use_2nd_half); fs_inst *load; fs_inst *write; if (devinfo->gen >= 7) { /* Send from the GRF */ fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F, exec_size); - load = emit(LOAD_PAYLOAD(payload, sources, length, payload_header_size)); + load = ubld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); payload.reg = alloc.allocate(load->regs_written); load->dst = payload; - write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload); + write = ubld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload); write->base_mrf = -1; } else { /* Send from the MRF */ - load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size), - sources, length, payload_header_size)); + load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size), + sources, length, payload_header_size); /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD * will do this for us if we just give it a COMPR4 destination. */ - if (brw->gen < 6 && exec_size == 16) + if (devinfo->gen < 6 && exec_size == 16) load->dst.reg |= BRW_MRF_COMPR4; - write = emit(FS_OPCODE_FB_WRITE); + write = ubld.emit(FS_OPCODE_FB_WRITE); write->exec_size = exec_size; write->base_mrf = 1; } @@ -3807,10 +1649,10 @@ fs_visitor::emit_fb_writes() fs_inst *inst = NULL; if (do_dual_src) { - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB dual-source write"); - inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, - reg_undef, 4, 8); + const fs_builder abld = bld.annotate("FB dual-source write"); + + inst = emit_single_fb_write(abld, this->outputs[0], + this->dual_src_output, reg_undef, 4, 8); inst->target = 0; /* SIMD16 dual source blending requires to send two SIMD8 dual source @@ -3831,8 +1673,9 @@ fs_visitor::emit_fb_writes() * m + 3: a1 */ if (dispatch_width == 16) { - inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, - reg_undef, 4, 8, true); + inst = emit_single_fb_write(abld, this->outputs[0], + this->dual_src_output, reg_undef, 4, 8, + true); inst->target = 0; } @@ -3843,14 +1686,14 @@ fs_visitor::emit_fb_writes() if (this->outputs[target].file == BAD_FILE) continue; - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB write target %d", - target); + const fs_builder abld = bld.annotate( + ralloc_asprintf(this->mem_ctx, "FB write target %d", target)); + fs_reg src0_alpha; if (devinfo->gen >= 6 && key->replicate_alpha && target != 0) src0_alpha = offset(outputs[0], 3); - inst = emit_single_fb_write(this->outputs[target], reg_undef, + inst = emit_single_fb_write(abld, this->outputs[target], reg_undef, src0_alpha, this->output_components[target], dispatch_width); @@ -3863,19 +1706,17 @@ fs_visitor::emit_fb_writes() * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. */ - inst = emit_single_fb_write(reg_undef, reg_undef, reg_undef, 0, + inst = emit_single_fb_write(bld, reg_undef, reg_undef, reg_undef, 0, dispatch_width); inst->target = 0; } inst->eot = true; - this->current_annotation = NULL; } void -fs_visitor::setup_uniform_clipplane_values() +fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) { - gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); const struct brw_vue_prog_key *key = (const struct brw_vue_prog_key *) this->key; @@ -3889,7 +1730,7 @@ fs_visitor::setup_uniform_clipplane_values() } } -void fs_visitor::compute_clip_distance() +void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) { struct brw_vue_prog_data *vue_prog_data = (struct brw_vue_prog_data *) prog_data; @@ -3918,9 +1759,9 @@ void fs_visitor::compute_clip_distance() if (outputs[clip_vertex].file == BAD_FILE) return; - setup_uniform_clipplane_values(); + setup_uniform_clipplane_values(clip_planes); - current_annotation = "user clip distances"; + const fs_builder abld = bld.annotate("user clip distances"); this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type); this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type); @@ -3930,16 +1771,16 @@ void fs_visitor::compute_clip_distance() fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4]; output.reg_offset = i & 3; - emit(MUL(output, outputs[clip_vertex], u)); + abld.MUL(output, outputs[clip_vertex], u); for (int j = 1; j < 4; j++) { u.reg = userplane[i].reg + j; - emit(MAD(output, output, offset(outputs[clip_vertex], j), u)); + abld.MAD(output, output, offset(outputs[clip_vertex], j), u); } } } void -fs_visitor::emit_urb_writes() +fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes) { int slot, urb_offset, length; struct brw_vs_prog_data *vs_prog_data = @@ -3954,18 +1795,17 @@ fs_visitor::emit_urb_writes() /* Lower legacy ff and ClipVertex clipping to clip distances */ if (key->base.userclip_active && !prog->UsesClipDistanceOut) - compute_clip_distance(); + compute_clip_distance(clip_planes); /* If we don't have any valid slots to write, just do a minimal urb write * send to terminate the shader. */ if (vue_map->slots_valid == 0) { fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), - BRW_REGISTER_TYPE_UD)))); - inst->force_writemask_all = true; + bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), + BRW_REGISTER_TYPE_UD))); - inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); + fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); inst->eot = true; inst->mlen = 1; inst->offset = 1; @@ -3994,7 +1834,7 @@ fs_visitor::emit_urb_writes() } zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - emit(MOV(zero, fs_reg(0u))); + bld.MOV(zero, fs_reg(0u)); sources[length++] = zero; if (vue_map->slots_valid & VARYING_BIT_LAYER) @@ -4049,8 +1889,7 @@ fs_visitor::emit_urb_writes() for (int i = 0; i < 4; i++) { reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type); src = offset(this->outputs[varying], i); - fs_inst *inst = emit(MOV(reg, src)); - inst->saturate = true; + set_saturate(true, bld.MOV(reg, src)); sources[length++] = reg; } } else { @@ -4060,7 +1899,7 @@ fs_visitor::emit_urb_writes() break; } - current_annotation = "URB write"; + const fs_builder abld = bld.annotate("URB write"); /* If we've queued up 8 registers of payload (2 VUE slots), if this is * the last slot or if we need to flush (see BAD_FILE varying case @@ -4073,22 +1912,14 @@ fs_visitor::emit_urb_writes() fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), BRW_REGISTER_TYPE_F, dispatch_width); - - /* We need WE_all on the MOV for the message header (the URB handles) - * so do a MOV to a dummy register and set force_writemask_all on the - * MOV. LOAD_PAYLOAD will preserve that. - */ - fs_reg dummy = fs_reg(GRF, alloc.allocate(1), - BRW_REGISTER_TYPE_UD); - fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0), - BRW_REGISTER_TYPE_UD)))); - inst->force_writemask_all = true; - payload_sources[0] = dummy; + payload_sources[0] = + fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); memcpy(&payload_sources[1], sources, length * sizeof sources[0]); - emit(LOAD_PAYLOAD(payload, payload_sources, length + 1, 1)); + abld.LOAD_PAYLOAD(payload, payload_sources, length + 1, 1); - inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); + fs_inst *inst = + abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); inst->eot = last; inst->mlen = length + 1; inst->offset = urb_offset; @@ -4100,21 +1931,9 @@ fs_visitor::emit_urb_writes() } void -fs_visitor::resolve_ud_negate(fs_reg *reg) -{ - if (reg->type != BRW_REGISTER_TYPE_UD || - !reg->negate) - return; - - fs_reg temp = vgrf(glsl_type::uint_type); - emit(MOV(temp, *reg)); - *reg = temp; -} - -void fs_visitor::emit_cs_terminate() { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* We are getting the thread ID from the compute shader header */ assert(stage == MESA_SHADER_COMPUTE); @@ -4125,94 +1944,53 @@ fs_visitor::emit_cs_terminate() */ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD); fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - fs_inst *inst = emit(MOV(payload, g0)); - inst->force_writemask_all = true; + bld.exec_all().MOV(payload, g0); /* Send a message to the thread spawner to terminate the thread. */ - inst = emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload); + fs_inst *inst = bld.exec_all() + .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload); inst->eot = true; } -/** - * Resolve the result of a Gen4-5 CMP instruction to a proper boolean. - * - * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined. - * If we need a proper boolean value, we have to fix it up to be 0 or ~0. - */ void -fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) +fs_visitor::emit_barrier() { - assert(devinfo->gen <= 5); + assert(devinfo->gen >= 7); - if (rvalue->type != glsl_type::bool_type) - return; + /* We are getting the barrier ID from the compute shader header */ + assert(stage == MESA_SHADER_COMPUTE); - fs_reg and_result = vgrf(glsl_type::bool_type); - fs_reg neg_result = vgrf(glsl_type::bool_type); - emit(AND(and_result, *reg, fs_reg(1))); - emit(MOV(neg_result, negate(and_result))); - *reg = neg_result; -} + fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); -fs_visitor::fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_fragment_program *fp, - unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base, - MESA_SHADER_FRAGMENT), - reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), - reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), - reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), - key(key), prog_data(&prog_data->base), - dispatch_width(dispatch_width), promoted_constants(0) -{ - this->mem_ctx = mem_ctx; - init(); -} + /* Clear the message payload */ + bld.exec_all().MOV(payload, fs_reg(0u)); -fs_visitor::fs_visitor(struct brw_context *brw, - void *mem_ctx, - const struct brw_vs_prog_key *key, - struct brw_vs_prog_data *prog_data, - struct gl_shader_program *shader_prog, - struct gl_vertex_program *cp, - unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base.base, - MESA_SHADER_VERTEX), - reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), - reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), - reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), - key(key), prog_data(&prog_data->base.base), - dispatch_width(dispatch_width), promoted_constants(0) -{ - this->mem_ctx = mem_ctx; - init(); + /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ + fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); + bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u)); + + /* Emit a gateway "barrier" message using the payload we set up, followed + * by a wait instruction. + */ + bld.exec_all().emit(SHADER_OPCODE_BARRIER, reg_undef, payload); } -fs_visitor::fs_visitor(struct brw_context *brw, +fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - const struct brw_cs_prog_key *key, - struct brw_cs_prog_data *prog_data, + gl_shader_stage stage, + const void *key, + struct brw_stage_prog_data *prog_data, struct gl_shader_program *shader_prog, - struct gl_compute_program *cp, - unsigned dispatch_width) - : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base, - MESA_SHADER_COMPUTE), - reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), - reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), - reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)), - key(key), prog_data(&prog_data->base), - dispatch_width(dispatch_width) -{ - this->mem_ctx = mem_ctx; - init(); -} - -void -fs_visitor::init() + struct gl_program *prog, + unsigned dispatch_width, + int shader_time_index) + : backend_shader(compiler, log_data, mem_ctx, + shader_prog, prog, prog_data, stage), + key(key), prog_data(prog_data), + dispatch_width(dispatch_width), + shader_time_index(shader_time_index), + promoted_constants(0), + bld(fs_builder(this, dispatch_width).at_end()) { switch (stage) { case MESA_SHADER_FRAGMENT: @@ -4232,9 +2010,6 @@ fs_visitor::init() this->failed = false; this->simd16_unsupported = false; this->no16_msg = NULL; - this->variable_ht = hash_table_ctor(0, - hash_table_pointer_hash, - hash_table_pointer_compare); this->nir_locals = NULL; this->nir_globals = NULL; @@ -4247,9 +2022,6 @@ fs_visitor::init() this->first_non_payload_grf = 0; this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; - this->current_annotation = NULL; - this->base_ir = NULL; - this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; @@ -4269,5 +2041,4 @@ fs_visitor::init() fs_visitor::~fs_visitor() { - hash_table_dtor(this->variable_ht); } diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index a323e4d9031..0b8bfc3d9bd 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -47,11 +47,12 @@ brw_upload_gs_pull_constants(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = &brw->gs.prog_data->base.base; + const struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base; + const bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; /* _NEW_PROGRAM_CONSTANTS */ brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program.Base, - stage_state, prog_data, false); + stage_state, &prog_data->base, dword_pitch); } const struct brw_tracked_state brw_gs_pull_constants = { @@ -77,8 +78,11 @@ brw_upload_gs_ubo_surfaces(struct brw_context *brw) return; /* BRW_NEW_GS_PROG_DATA */ + struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base; + bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; + brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], - &brw->gs.base, &brw->gs.prog_data->base.base, false); + &brw->gs.base, &prog_data->base, dword_pitch); } const struct brw_tracked_state brw_gs_ubo_surfaces = { diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h index e347c518348..7a8c210118c 100644 --- a/src/mesa/drivers/dri/i965/brw_inst.h +++ b/src/mesa/drivers/dri/i965/brw_inst.h @@ -322,6 +322,9 @@ FJ(gen4_jump_count, 111, 96, devinfo->gen < 6) FC(gen4_pop_count, 115, 112, devinfo->gen < 6) /** @} */ +/* Message descriptor bits */ +#define MD(x) ((x) + 96) + /** * Fields for SEND messages: * @{ @@ -347,6 +350,7 @@ FF(header_present, /* 6: */ 115, 115, /* 7: */ 115, 115, /* 8: */ 115, 115) +F(gateway_notify, MD(16), MD(15)) FF(function_control, /* 4: */ 111, 96, /* 4.5: */ 111, 96, @@ -354,6 +358,13 @@ FF(function_control, /* 6: */ 114, 96, /* 7: */ 114, 96, /* 8: */ 114, 96) +FF(gateway_subfuncid, + /* 4: */ MD(1), MD(0), + /* 4.5: */ MD(1), MD(0), + /* 5: */ MD(1), MD(0), /* 2:0, but bit 2 is reserved MBZ */ + /* 6: */ MD(2), MD(0), + /* 7: */ MD(2), MD(0), + /* 8: */ MD(2), MD(0)) FF(sfid, /* 4: */ 123, 120, /* called msg_target */ /* 4.5 */ 123, 120, @@ -364,9 +375,6 @@ FF(sfid, FC(base_mrf, 27, 24, devinfo->gen < 6); /** @} */ -/* Message descriptor bits */ -#define MD(x) (x + 96) - /** * URB message function control bits: * @{ diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index f3dfe790f34..96dc20da3cf 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -131,14 +131,15 @@ horiz_offset(fs_reg reg, unsigned delta) static inline fs_reg offset(fs_reg reg, unsigned delta) { - assert(reg.stride > 0); switch (reg.file) { case BAD_FILE: break; case GRF: case MRF: case ATTR: - return byte_offset(reg, delta * reg.width * reg.stride * type_sz(reg.type)); + return byte_offset(reg, + delta * MAX2(reg.width * reg.stride, 1) * + type_sz(reg.type)); case UNIFORM: reg.reg_offset += delta; break; @@ -155,6 +156,7 @@ component(fs_reg reg, unsigned idx) assert(idx < reg.width); reg.subreg_offset = idx * type_sz(reg.type); reg.width = 1; + reg.stride = 0; return reg; } @@ -254,9 +256,62 @@ public: uint8_t exec_size; bool eot:1; - bool force_uncompressed:1; bool force_sechalf:1; bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ }; +/** + * Set second-half quarter control on \p inst. + */ +static inline fs_inst * +set_sechalf(fs_inst *inst) +{ + inst->force_sechalf = true; + return inst; +} + +/** + * Make the execution of \p inst dependent on the evaluation of a possibly + * inverted predicate. + */ +static inline fs_inst * +set_predicate_inv(enum brw_predicate pred, bool inverse, + fs_inst *inst) +{ + inst->predicate = pred; + inst->predicate_inverse = inverse; + return inst; +} + +/** + * Make the execution of \p inst dependent on the evaluation of a predicate. + */ +static inline fs_inst * +set_predicate(enum brw_predicate pred, fs_inst *inst) +{ + return set_predicate_inv(pred, false, inst); +} + +/** + * Write the result of evaluating the condition given by \p mod to a flag + * register. + */ +static inline fs_inst * +set_condmod(enum brw_conditional_mod mod, fs_inst *inst) +{ + inst->conditional_mod = mod; + return inst; +} + +/** + * Clamp the result of \p inst to the saturation range of its destination + * datatype. + */ +static inline fs_inst * +set_saturate(bool saturate, fs_inst *inst) +{ + inst->saturate = saturate; + return inst; +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index a56fdd6fce9..fceacae0e51 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -190,6 +190,50 @@ public: } }; +/** + * Make the execution of \p inst dependent on the evaluation of a possibly + * inverted predicate. + */ +inline vec4_instruction * +set_predicate_inv(enum brw_predicate pred, bool inverse, + vec4_instruction *inst) +{ + inst->predicate = pred; + inst->predicate_inverse = inverse; + return inst; +} + +/** + * Make the execution of \p inst dependent on the evaluation of a predicate. + */ +inline vec4_instruction * +set_predicate(enum brw_predicate pred, vec4_instruction *inst) +{ + return set_predicate_inv(pred, false, inst); +} + +/** + * Write the result of evaluating the condition given by \p mod to a flag + * register. + */ +inline vec4_instruction * +set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst) +{ + inst->conditional_mod = mod; + return inst; +} + +/** + * Clamp the result of \p inst to the saturation range of its destination + * datatype. + */ +inline vec4_instruction * +set_saturate(bool saturate, vec4_instruction *inst) +{ + inst->saturate = saturate; + return inst; +} + } /* namespace brw */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp index 0424003ffd5..7a5f9834423 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp @@ -89,19 +89,18 @@ txs_type(const glsl_type *type) ir_visitor_status lower_texture_grad_visitor::visit_leave(ir_texture *ir) { - /* Only lower textureGrad with shadow samplers */ - if (ir->op != ir_txd || !ir->shadow_comparitor) + /* Only lower textureGrad with cube maps or shadow samplers */ + if (ir->op != ir_txd || + (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE && + !ir->shadow_comparitor)) return visit_continue; - /* Lower textureGrad() with samplerCubeShadow even if we have the sample_d_c + /* Lower textureGrad() with samplerCube* even if we have the sample_d_c * message. GLSL provides gradients for the 'r' coordinate. Unfortunately: * * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description: * "The r coordinate contains the faceid, and the r gradients are ignored * by hardware." - * - * We likely need to do a similar treatment for samplerCube and - * samplerCubeArray, but we have insufficient testing for that at the moment. */ bool need_lowering = !has_sample_d_c || ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE; @@ -155,9 +154,20 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir) expr(ir_unop_sqrt, dot(dPdy, dPdy))); } - /* lambda_base = log2(rho). We're ignoring GL state biases for now. */ + /* lambda_base = log2(rho). We're ignoring GL state biases for now. + * + * For cube maps the result of these formulas is giving us a value of rho + * that is twice the value we should use, so divide it by 2 or, + * alternatively, remove one unit from the result of the log2 computation. + */ ir->op = ir_txl; - ir->lod_info.lod = expr(ir_unop_log2, rho); + if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { + ir->lod_info.lod = expr(ir_binop_add, + expr(ir_unop_log2, rho), + new(mem_ctx) ir_constant(-1.0f)); + } else { + ir->lod_info.lod = expr(ir_unop_log2, rho); + } progress = true; return visit_continue; diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 06916e28cbd..49f2e3e498c 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -339,8 +339,13 @@ is_color_fast_clear_compatible(struct brw_context *brw, mesa_format format, const union gl_color_union *color) { - if (_mesa_is_format_integer_color(format)) + if (_mesa_is_format_integer_color(format)) { + if (brw->gen >= 8) { + perf_debug("Integer fast clear not enabled for (%s)", + _mesa_get_format_name(format)); + } return false; + } for (int i = 0; i < 4; i++) { if (color->f[i] != 0.0 && color->f[i] != 1.0 && @@ -466,7 +471,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, * linear (untiled) memory is UNDEFINED." */ if (irb->mt->tiling == I915_TILING_NONE) { - perf_debug("falling back to plain clear because buffers are untiled\n"); + perf_debug("Falling back to plain clear because %dx%d buffer is untiled\n", + irb->mt->logical_width0, irb->mt->logical_height0); clear_type = PLAIN_CLEAR; } @@ -477,7 +483,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, for (int i = 0; i < 4; i++) { if (_mesa_format_has_color_component(irb->mt->format, i) && !color_mask[i]) { - perf_debug("falling back to plain clear because of color mask\n"); + perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n", + irb->mt->logical_width0, irb->mt->logical_height0); clear_type = PLAIN_CLEAR; } } diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index fc7018d15b9..d079197a2a9 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -414,6 +414,12 @@ brw_meta_stencil_blit(struct brw_context *brw, GLenum target; _mesa_meta_fb_tex_blit_begin(ctx, &blit); + /* XXX: Pretend to support stencil textures so _mesa_base_tex_format() + * returns a valid format. When we properly support the extension, we + * should remove this. + */ + assert(ctx->Extensions.ARB_texture_stencil8 == false); + ctx->Extensions.ARB_texture_stencil8 = true; _mesa_GenFramebuffers(1, &fbo); /* Force the surface to be configured for level zero. */ @@ -451,6 +457,7 @@ brw_meta_stencil_blit(struct brw_context *brw, _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); error: + ctx->Extensions.ARB_texture_stencil8 = false; _mesa_meta_fb_tex_blit_end(ctx, target, &blit); _mesa_meta_end(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 67a693b5ec1..5a4515b582d 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -39,6 +39,7 @@ #include "brw_state.h" #include "brw_defines.h" +#include "main/framebuffer.h" #include "main/fbobject.h" #include "main/glformats.h" @@ -46,12 +47,14 @@ static void upload_drawing_rect(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const unsigned int fb_width = _mesa_geometric_width(fb); + const unsigned int fb_height = _mesa_geometric_height(fb); BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ - OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | - ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); } @@ -767,7 +770,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) * works just fine, and there's no window system to worry about. */ if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) - OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); + OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31); else OUT_BATCH(0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index e4119b1aa3f..b7bb2315b97 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -122,18 +122,9 @@ brw_create_nir(struct brw_context *brw, /* Get rid of split copies */ nir_optimize(nir); - if (shader_prog) { - nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &nir->num_direct_uniforms, - &nir->num_uniforms); - } else { - /* ARB programs generally create a giant array of "uniform" data, and allow - * indirect addressing without any boundaries. In the absence of bounds - * analysis, it's all or nothing. num_direct_uniforms is only useful when - * we have some direct and some indirect access; it doesn't matter here. - */ - nir->num_direct_uniforms = 0; - } + nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, + &nir->num_direct_uniforms, + &nir->num_uniforms); nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); @@ -176,6 +167,12 @@ brw_create_nir(struct brw_context *brw, nir_validate_shader(nir); if (unlikely(debug_enabled)) { + /* Re-index SSA defs so we print more sensible numbers. */ + nir_foreach_overload(nir, overload) { + if (overload->impl) + nir_index_ssa_defs(overload->impl); + } + fprintf(stderr, "NIR (SSA form) for %s shader:\n", _mesa_shader_stage_to_string(stage)); nir_print_shader(nir, stderr); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index b056fbfc427..ea128ccb670 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -88,7 +88,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx, return NULL; } - case MESA_GEOMETRY_PROGRAM: { + case GL_GEOMETRY_PROGRAM_NV: { struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program); if (prog) { prog->id = get_new_program_id(brw->intelScreen); @@ -287,18 +287,24 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ) functions->MemoryBarrier = brw_memory_barrier; } +struct shader_times { + uint64_t time; + uint64_t written; + uint64_t reset; +}; + void brw_init_shader_time(struct brw_context *brw) { - const int max_entries = 4096; - brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time", - max_entries * SHADER_TIME_STRIDE, - 4096); + const int max_entries = 2048; + brw->shader_time.bo = + drm_intel_bo_alloc(brw->bufmgr, "shader time", + max_entries * SHADER_TIME_STRIDE * 3, 4096); brw->shader_time.names = rzalloc_array(brw, const char *, max_entries); brw->shader_time.ids = rzalloc_array(brw, int, max_entries); brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, max_entries); - brw->shader_time.cumulative = rzalloc_array(brw, uint64_t, + brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times, max_entries); brw->shader_time.max_entries = max_entries; } @@ -319,27 +325,6 @@ compare_time(const void *a, const void *b) } static void -get_written_and_reset(struct brw_context *brw, int i, - uint64_t *written, uint64_t *reset) -{ - enum shader_time_shader_type type = brw->shader_time.types[i]; - assert(type == ST_VS || type == ST_GS || type == ST_FS8 || - type == ST_FS16 || type == ST_CS); - - /* Find where we recorded written and reset. */ - int wi, ri; - - for (wi = i; brw->shader_time.types[wi] != type + 1; wi++) - ; - - for (ri = i; brw->shader_time.types[ri] != type + 2; ri++) - ; - - *written = brw->shader_time.cumulative[wi]; - *reset = brw->shader_time.cumulative[ri]; -} - -static void print_shader_time_line(const char *stage, const char *name, int shader_num, uint64_t time, uint64_t total) { @@ -374,26 +359,13 @@ brw_report_shader_time(struct brw_context *brw) sorted[i] = &scaled[i]; switch (type) { - case ST_VS_WRITTEN: - case ST_VS_RESET: - case ST_GS_WRITTEN: - case ST_GS_RESET: - case ST_FS8_WRITTEN: - case ST_FS8_RESET: - case ST_FS16_WRITTEN: - case ST_FS16_RESET: - case ST_CS_WRITTEN: - case ST_CS_RESET: - /* We'll handle these when along with the time. */ - scaled[i] = 0; - continue; - case ST_VS: case ST_GS: case ST_FS8: case ST_FS16: case ST_CS: - get_written_and_reset(brw, i, &written, &reset); + written = brw->shader_time.cumulative[i].written; + reset = brw->shader_time.cumulative[i].reset; break; default: @@ -405,7 +377,7 @@ brw_report_shader_time(struct brw_context *brw) break; } - uint64_t time = brw->shader_time.cumulative[i]; + uint64_t time = brw->shader_time.cumulative[i].time; if (written) { scaled[i] = time / written * (written + reset); } else { @@ -491,16 +463,19 @@ brw_collect_shader_time(struct brw_context *brw) * overhead compared to the cost of tracking the time in the first place. */ drm_intel_bo_map(brw->shader_time.bo, true); - - uint32_t *times = brw->shader_time.bo->virtual; + void *bo_map = brw->shader_time.bo->virtual; for (int i = 0; i < brw->shader_time.num_entries; i++) { - brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4]; + uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE; + + brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4]; + brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4]; + brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4]; } /* Zero the BO out to clear it out for our next collection. */ - memset(times, 0, brw->shader_time.bo->size); + memset(bo_map, 0, brw->shader_time.bo->size); drm_intel_bo_unmap(brw->shader_time.bo); } diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 667c9009304..aea4d9b77d3 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -66,10 +66,20 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_WRITE_DEPTH_COUNT - | PIPE_CONTROL_DEPTH_STALL, - query_bo, idx * sizeof(uint64_t), 0, 0); + uint32_t flags; + + flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_DEPTH_STALL); + + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM + * command when loading the values into the predicate source registers for + * conditional rendering. + */ + if (brw->predicate.supported) + flags |= PIPE_CONTROL_FLUSH_ENABLE; + + brw_emit_pipe_control_write(brw, flags, query_bo, + idx * sizeof(uint64_t), 0, 0); } /** diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index c03a8aed796..c8b134103bb 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -765,6 +765,22 @@ brw_ip_reg(void) } static inline struct brw_reg +brw_notification_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NOTIFICATION_COUNT, + 0, + 0, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + +static inline struct brw_reg brw_acc_reg(unsigned width) { return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, @@ -778,7 +794,11 @@ brw_flag_reg(int reg, int subreg) BRW_ARF_FLAG + reg, subreg); } - +/** + * Return the mask register present in Gen4-5, or the related register present + * in Gen7.5 and later hardware referred to as "channel enable" register in + * the documentation. + */ static inline struct brw_reg brw_mask_reg(unsigned subnr) { diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 34f75fdd814..ee0add5d765 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -399,10 +399,10 @@ schedule_node::set_latency_gen7(bool is_haswell) class instruction_scheduler { public: - instruction_scheduler(backend_visitor *v, int grf_count, + instruction_scheduler(backend_shader *s, int grf_count, instruction_scheduler_mode mode) { - this->bv = v; + this->bs = s; this->mem_ctx = ralloc_context(NULL); this->grf_count = grf_count; this->instructions.make_empty(); @@ -455,7 +455,7 @@ public: int grf_count; int time; exec_list instructions; - backend_visitor *bv; + backend_shader *bs; instruction_scheduler_mode mode; @@ -606,7 +606,7 @@ vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *b schedule_node::schedule_node(backend_instruction *inst, instruction_scheduler *sched) { - const struct brw_device_info *devinfo = sched->bv->devinfo; + const struct brw_device_info *devinfo = sched->bs->devinfo; this->inst = inst; this->child_array_size = 0; @@ -1384,7 +1384,7 @@ vec4_instruction_scheduler::issue_time(backend_instruction *inst) void instruction_scheduler::schedule_instructions(bblock_t *block) { - const struct brw_device_info *devinfo = bv->devinfo; + const struct brw_device_info *devinfo = bs->devinfo; backend_instruction *inst = block->end(); time = 0; @@ -1419,7 +1419,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block) if (debug) { fprintf(stderr, "clock %4d, scheduled: ", time); - bv->dump_instruction(chosen->inst); + bs->dump_instruction(chosen->inst); } /* Now that we've scheduled a new instruction, some of its @@ -1435,7 +1435,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block) if (debug) { fprintf(stderr, "\tchild %d, %d parents: ", i, child->parent_count); - bv->dump_instruction(child->inst); + bs->dump_instruction(child->inst); } child->cand_generation = cand_generation; @@ -1474,7 +1474,7 @@ instruction_scheduler::run(cfg_t *cfg) if (debug) { fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n", post_reg_alloc); - bv->dump_instructions(); + bs->dump_instructions(); } /* Populate the remaining GRF uses array to improve the pre-regalloc @@ -1504,7 +1504,7 @@ instruction_scheduler::run(cfg_t *cfg) if (debug) { fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n", post_reg_alloc); - bv->dump_instructions(); + bs->dump_instructions(); } } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 014b43448ad..5d9892214a9 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -52,6 +52,12 @@ static void upload_sf_vp(struct brw_context *brw) sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); + /* Accessing the fields Width and Height of gl_framebuffer to produce the + * values to program the viewport and scissor is fine as long as the + * gl_framebuffer has atleast one attachment. + */ + assert(ctx->DrawBuffer->_HasAttachments); + if (render_to_fbo) { y_scale = 1.0; y_bias = 0; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index ebfb49acf8d..06393c8ff2b 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -32,16 +32,106 @@ #include "glsl/glsl_parser_extras.h" #include "main/shaderapi.h" +static void +shader_debug_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + va_list args; + + va_start(args, fmt); + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); + va_end(args); +} + +static void +shader_perf_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + va_list args_copy; + va_copy(args_copy, args); + vfprintf(stderr, fmt, args_copy); + va_end(args_copy); + } + + if (brw->perf_debug) { + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_PERFORMANCE, + MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); + } + va_end(args); +} + struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); compiler->devinfo = devinfo; + compiler->shader_debug_log = shader_debug_log_mesa; + compiler->shader_perf_log = shader_perf_log_mesa; brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); + if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) + compiler->scalar_vs = true; + + nir_shader_compiler_options *nir_options = + rzalloc(compiler, nir_shader_compiler_options); + nir_options->native_integers = true; + /* In order to help allow for better CSE at the NIR level we tell NIR + * to split all ffma instructions during opt_algebraic and we then + * re-combine them as a later step. + */ + nir_options->lower_ffma = true; + nir_options->lower_sub = true; + + /* We want the GLSL compiler to emit code that uses condition codes */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; + compiler->glsl_compiler_options[i].MaxIfDepth = + devinfo->gen < 6 ? 16 : UINT_MAX; + + compiler->glsl_compiler_options[i].EmitCondCodes = true; + compiler->glsl_compiler_options[i].EmitNoNoise = true; + compiler->glsl_compiler_options[i].EmitNoMainReturn = true; + compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; + compiler->glsl_compiler_options[i].EmitNoIndirectOutput = + (i == MESA_SHADER_FRAGMENT); + compiler->glsl_compiler_options[i].EmitNoIndirectTemp = + (i == MESA_SHADER_FRAGMENT); + compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; + compiler->glsl_compiler_options[i].LowerClipDistance = true; + } + + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true; + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; + + if (compiler->scalar_vs) { + /* If we're using the scalar backend for vertex shaders, we need to + * configure these accordingly. + */ + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false; + + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options; + } + + compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options; + compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options; + return compiler; } @@ -97,7 +187,7 @@ is_scalar_shader_stage(struct brw_context *brw, int stage) case MESA_SHADER_FRAGMENT: return true; case MESA_SHADER_VERTEX: - return brw->scalar_vs; + return brw->intelScreen->compiler->scalar_vs; default: return false; } @@ -632,6 +722,8 @@ brw_instruction_name(enum opcode op) return "gs_ff_sync_set_primitives"; case CS_OPCODE_CS_TERMINATE: return "cs_terminate"; + case SHADER_OPCODE_BARRIER: + return "barrier"; } unreachable("not reached"); @@ -755,19 +847,22 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) return false; } -backend_visitor::backend_visitor(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage) - : brw(brw), - devinfo(brw->intelScreen->devinfo), - ctx(&brw->ctx), +backend_shader::backend_shader(const struct brw_compiler *compiler, + void *log_data, + void *mem_ctx, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + gl_shader_stage stage) + : compiler(compiler), + log_data(log_data), + devinfo(compiler->devinfo), shader(shader_prog ? (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL), shader_prog(shader_prog), prog(prog), stage_prog_data(stage_prog_data), + mem_ctx(mem_ctx), cfg(NULL), stage(stage) { @@ -950,7 +1045,6 @@ backend_instruction::can_do_saturate() const case BRW_OPCODE_LINE: case BRW_OPCODE_LRP: case BRW_OPCODE_MAC: - case BRW_OPCODE_MACH: case BRW_OPCODE_MAD: case BRW_OPCODE_MATH: case BRW_OPCODE_MOV: @@ -1060,6 +1154,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_URB_WRITE_SIMD8: case FS_OPCODE_FB_WRITE: + case SHADER_OPCODE_BARRIER: return true; default: return false; @@ -1148,13 +1243,13 @@ backend_instruction::remove(bblock_t *block) } void -backend_visitor::dump_instructions() +backend_shader::dump_instructions() { dump_instructions(NULL); } void -backend_visitor::dump_instructions(const char *name) +backend_shader::dump_instructions(const char *name) { FILE *file = stderr; if (name && geteuid() != 0) { @@ -1183,7 +1278,7 @@ backend_visitor::dump_instructions(const char *name) } void -backend_visitor::calculate_cfg() +backend_shader::calculate_cfg() { if (this->cfg) return; @@ -1191,7 +1286,7 @@ backend_visitor::calculate_cfg() } void -backend_visitor::invalidate_cfg() +backend_shader::invalidate_cfg() { ralloc_free(this->cfg); this->cfg = NULL; @@ -1206,7 +1301,7 @@ backend_visitor::invalidate_cfg() * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. */ void -backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) +backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) { int num_textures = _mesa_fls(prog->SamplersUsed); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 59a0eff824e..b2c1a0b8d69 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -86,6 +86,12 @@ struct brw_compiler { */ int aligned_pairs_class; } fs_reg_sets[2]; + + void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); + void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); + + bool scalar_vs; + struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; enum PACKED register_file { @@ -211,20 +217,23 @@ enum instruction_scheduler_mode { SCHEDULE_POST, }; -class backend_visitor : public ir_visitor { +class backend_shader { protected: - backend_visitor(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage); + backend_shader(const struct brw_compiler *compiler, + void *log_data, + void *mem_ctx, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + gl_shader_stage stage); public: - struct brw_context * const brw; + const struct brw_compiler *compiler; + void *log_data; /* Passed to compiler->*_log functions */ + const struct brw_device_info * const devinfo; - struct gl_context * const ctx; struct brw_shader * const shader; struct gl_shader_program * const shader_prog; struct gl_program * const prog; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 26fdae64ea4..987672f8815 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -229,11 +229,14 @@ void brw_destroy_caches( struct brw_context *brw ); #define BRW_BATCH_STRUCT(brw, s) \ intel_batchbuffer_data(brw, (s), sizeof(*(s)), RENDER_RING) -void *brw_state_batch(struct brw_context *brw, - enum aub_state_struct_type type, - int size, - int alignment, - uint32_t *out_offset); +void *__brw_state_batch(struct brw_context *brw, + enum aub_state_struct_type type, + int size, + int alignment, + int index, + uint32_t *out_offset); +#define brw_state_batch(brw, type, size, alignment, out_offset) \ + __brw_state_batch(brw, type, size, alignment, 0, out_offset) /* brw_wm_surface_state.c */ void gen4_init_vtable_surface_functions(struct brw_context *brw); @@ -246,6 +249,7 @@ void brw_configure_w_tiled(const struct intel_mipmap_tree *mt, unsigned *pitch, uint32_t *tiling, unsigned *format); +const char *brw_surface_format_name(unsigned format); uint32_t brw_format_for_mesa_format(mesa_format mesa_format); GLuint translate_tex_target(GLenum target); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 45dca69823f..a405a80ef6e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -38,7 +38,8 @@ static void brw_track_state_batch(struct brw_context *brw, enum aub_state_struct_type type, uint32_t offset, - int size) + int size, + int index) { struct intel_batchbuffer *batch = &brw->batch; @@ -53,6 +54,7 @@ brw_track_state_batch(struct brw_context *brw, brw->state_batch_list[brw->state_batch_count].offset = offset; brw->state_batch_list[brw->state_batch_count].size = size; brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_list[brw->state_batch_count].index = index; brw->state_batch_count++; } @@ -108,18 +110,20 @@ brw_annotate_aub(struct brw_context *brw) * margin (4096 bytes, even if the object is just a 20-byte surface * state), and more buffers to walk and count for aperture size checking. * - * However, due to the restrictions inposed by the aperture size + * However, due to the restrictions imposed by the aperture size * checking performance hacks, we can't have the batch point at a * separate indirect state buffer, because once the batch points at * it, no more relocations can be added to it. So, we sneak these * buffers in at the top of the batchbuffer. */ void * -brw_state_batch(struct brw_context *brw, - enum aub_state_struct_type type, - int size, - int alignment, - uint32_t *out_offset) +__brw_state_batch(struct brw_context *brw, + enum aub_state_struct_type type, + int size, + int alignment, + int index, + uint32_t *out_offset) + { struct intel_batchbuffer *batch = &brw->batch; uint32_t offset; @@ -140,7 +144,7 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_AUB))) - brw_track_state_batch(brw, type, offset, size); + brw_track_state_batch(brw, type, offset, size, index); *out_offset = offset; return batch->map + (offset>>2); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 530f5a8b76e..b6f4d598e1d 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -1,5 +1,5 @@ /* - * Copyright © 2007 Intel Corporation + * Copyright © 2007-2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +31,41 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_eu.h" +#include "brw_state.h" + +static const char *sampler_mip_filter[] = { + "NONE", + "NEAREST", + "RSVD", + "LINEAR" +}; + +static const char *sampler_mag_filter[] = { + "NEAREST", + "LINEAR", + "ANISOTROPIC", + "FLEXIBLE (GEN8+)", + "RSVD", "RSVD", + "MONO", + "RSVD" +}; + +static const char *sampler_addr_mode[] = { + "WRAP", + "MIRROR", + "CLAMP", + "CUBE", + "CLAMP_BORDER", + "MIRROR_ONCE", + "HALF_BORDER" +}; + +static const char *surface_tiling[] = { + "LINEAR", + "W-tiled", + "X-tiled", + "Y-tiled" +}; static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, @@ -50,6 +85,25 @@ batch_out(struct brw_context *brw, const char *name, uint32_t offset, va_end(va); } +static void +batch_out64(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) +{ + uint32_t *tmp = brw->batch.bo->virtual + offset; + + /* Swap the dwords since we want to handle this as a 64b value, but the data + * is typically emitted as dwords. + */ + uint64_t data = ((uint64_t)tmp[index + 1]) << 32 | tmp[index]; + va_list va; + + fprintf(stderr, "0x%08x: 0x%016" PRIx64 ": %8s: ", + offset + index * 4, data, name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + static const char * get_965_surfacetype(unsigned int surfacetype) { @@ -64,19 +118,6 @@ get_965_surfacetype(unsigned int surfacetype) } } -static const char * -get_965_surface_format(unsigned int surface_format) -{ - switch (surface_format) { - case 0x000: return "r32g32b32a32_float"; - case 0x0c1: return "b8g8r8a8_unorm"; - case 0x100: return "b5g6r5_unorm"; - case 0x102: return "b5g5r5a1_unorm"; - case 0x104: return "b4g4r4a4_unorm"; - default: return "unknown"; - } -} - static void dump_vs_state(struct brw_context *brw, uint32_t offset) { const char *name = "VS_STATE"; @@ -176,7 +217,7 @@ static void dump_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 0, "%s %s\n", get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); batch_out(brw, name, offset, 1, "offset\n"); batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, @@ -200,7 +241,7 @@ static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 0, "%s %s %s\n", get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), + brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : ""); batch_out(brw, name, offset, 1, "offset\n"); batch_out(brw, name, offset, 2, "%dx%d size, %d mips, %d slices\n", @@ -222,6 +263,87 @@ static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 7, "\n"); } +static float q_to_float(uint32_t data, int integer_end, int integer_start, + int fractional_end, int fractional_start) +{ + /* Convert the number to floating point. */ + float n = GET_BITS(data, integer_start, fractional_end); + + /* Multiply by 2^-n */ + return n * exp2(-(fractional_end - fractional_start + 1)); +} + +static void +dump_gen8_surface_state(struct brw_context *brw, uint32_t offset, int index) +{ + uint32_t *surf = brw->batch.bo->virtual + offset; + int aux_mode = surf[6] & INTEL_MASK(2, 0); + const char *aux_str; + char *name; + + if (brw->gen >= 9 && (aux_mode == 1 || aux_mode == 5)) { + bool msrt = GET_BITS(surf[4], 5, 3) > 0; + bool compression = GET_FIELD(surf[7], GEN9_SURFACE_RT_COMPRESSION) == 1; + aux_str = ralloc_asprintf(NULL, "AUX_CCS_%c (%s, MULTISAMPLE_COUNT%c1)", + (aux_mode == 1) ? 'D' : 'E', + compression ? "Compressed RT" : "Uncompressed", + msrt ? '>' : '='); + } else { + static const char *surface_aux_mode[] = { "AUX_NONE", "AUX_MCS", + "AUX_APPEND", "AUX_HIZ", + "RSVD", "RSVD"}; + aux_str = ralloc_asprintf(NULL, "%s", surface_aux_mode[aux_mode]); + } + + name = ralloc_asprintf(NULL, "SURF%03d", index); + batch_out(brw, name, offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), + (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : "", + 1 << (GET_BITS(surf[0], 17, 16) + 1), /* VALIGN */ + 1 << (GET_BITS(surf[0], 15, 14) + 1), /* HALIGN */ + surface_tiling[GET_BITS(surf[0], 13, 12)]); + batch_out(brw, name, offset, 1, "MOCS: 0x%x Base MIP: %.1f (%u mips) Surface QPitch: %d\n", + GET_FIELD(surf[1], GEN8_SURFACE_MOCS), + q_to_float(surf[1], 23, 20, 19, 19), + surf[5] & INTEL_MASK(3, 0), + GET_FIELD(surf[1], GEN8_SURFACE_QPITCH) << 2); + batch_out(brw, name, offset, 2, "%dx%d [%s]\n", + GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1, + aux_str); + batch_out(brw, name, offset, 3, "%d slices (depth), pitch: %d\n", + GET_FIELD(surf[3], BRW_SURFACE_DEPTH) + 1, + (surf[3] & INTEL_MASK(17, 0)) + 1); + batch_out(brw, name, offset, 4, "min array element: %d, array extent %d, MULTISAMPLE_%d\n", + GET_FIELD(surf[4], GEN7_SURFACE_MIN_ARRAY_ELEMENT), + GET_FIELD(surf[4], GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT) + 1, + 1 << GET_BITS(surf[4], 5, 3)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d, min LOD: %d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET), + GET_FIELD(surf[5], GEN7_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 6, "AUX pitch: %d qpitch: %d\n", + GET_FIELD(surf[6], GEN8_SURFACE_AUX_QPITCH) << 2, + GET_FIELD(surf[6], GEN8_SURFACE_AUX_PITCH) << 2); + if (brw->gen >= 9) { + batch_out(brw, name, offset, 7, "Clear color: R(%x)G(%x)B(%x)A(%x)\n", + surf[12], surf[13], surf[14], surf[15]); + } else { + batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", + GET_BITS(surf[7], 31, 31) ? 'R' : '-', + GET_BITS(surf[7], 30, 30) ? 'G' : '-', + GET_BITS(surf[7], 29, 29) ? 'B' : '-', + GET_BITS(surf[7], 28, 28) ? 'A' : '-'); + } + + for (int i = 8; i < 12; i++) + batch_out(brw, name, offset, i, "0x%08x\n", surf[i]); + + ralloc_free((void *)aux_str); + ralloc_free(name); +} + static void dump_sdc(struct brw_context *brw, uint32_t offset) { @@ -229,7 +351,7 @@ dump_sdc(struct brw_context *brw, uint32_t offset) if (brw->gen >= 5 && brw->gen <= 6) { struct gen5_sampler_default_color *sdc = (brw->batch.bo->virtual + - offset); + offset); batch_out(brw, name, offset, 0, "unorm rgba\n"); batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); @@ -271,6 +393,45 @@ static void dump_sampler_state(struct brw_context *brw, } } +static void gen7_dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) +{ + const uint32_t *samp = brw->batch.bo->virtual + offset; + char name[20]; + + for (int i = 0; i < size / 16; i++) { + sprintf(name, "SAMPLER_STATE %d", i); + batch_out(brw, name, offset, i, + "Disabled = %s, Base Mip: %u.%u, Mip/Mag/Min Filter: %s/%s/%s, LOD Bias: %d.%d\n", + GET_BITS(samp[0], 31, 31) ? "yes" : "no", + GET_BITS(samp[0], 26, 23), + GET_BITS(samp[0], 22, 22), + sampler_mip_filter[GET_FIELD(samp[0], BRW_SAMPLER_MIP_FILTER)], + sampler_mag_filter[GET_FIELD(samp[0], BRW_SAMPLER_MAG_FILTER)], + /* min filter defs are the same as mag */ + sampler_mag_filter[GET_FIELD(samp[0], BRW_SAMPLER_MIN_FILTER)], + GET_BITS(samp[0], 13, 10), + GET_BITS(samp[0], 9, 1) + ); + batch_out(brw, name, offset, i+1, "Min LOD: %u.%u, Max LOD: %u.%u\n", + GET_BITS(samp[1], 31, 28), + GET_BITS(samp[1], 27, 20), + GET_BITS(samp[1], 19, 16), + GET_BITS(samp[1], 15, 8) + ); + batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: gen8+ */ + batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s\n", + (GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2, + sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCX_WRAP_MODE)], + sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCY_WRAP_MODE)], + sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)] + ); + + samp += 4; + offset += 4 * sizeof(uint32_t); + } +} + static void dump_sf_viewport_state(struct brw_context *brw, uint32_t offset) { @@ -320,10 +481,17 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw, batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); - batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); - batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); - batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); - batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); + batch_out(brw, name, offset, 8, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 9, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 9, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 10, "guardband ymax = %f\n", vp->guardband.ymax); + if (brw->gen >= 8) { + float *cc_vp = brw->batch.bo->virtual + offset; + batch_out(brw, name, offset, 12, "Min extents: %.2fx%.2f\n", + cc_vp[12], cc_vp[14]); + batch_out(brw, name, offset, 14, "Max extents: %.2fx%.2f\n", + cc_vp[13], cc_vp[15]); + } } @@ -398,6 +566,92 @@ static void dump_blend_state(struct brw_context *brw, uint32_t offset) } static void +gen8_dump_blend_state(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const uint32_t *blend = brw->batch.bo->virtual + offset; + const char *logicop[] = + { + "LOGICOP_CLEAR (BLACK)", + "LOGICOP_NOR", + "LOGICOP_AND_INVERTED", + "LOGICOP_COPY_INVERTED", + "LOGICOP_AND_REVERSE", + "LOGICOP_INVERT", + "LOGICOP_XOR", + "LOGICOP_NAND", + "LOGICOP_AND", + "LOGICOP_EQUIV", + "LOGICOP_NOOP", + "LOGICOP_OR_INVERTED", + "LOGICOP_COPY", + "LOGICOP_OR_REVERSE", + "LOGICOP_OR", + "LOGICOP_SET (WHITE)" + }; + + const char *blend_function[] = + { "ADD", "SUBTRACT", "REVERSE_SUBTRACT", "MIN", "MAX};" }; + + const char *blend_factor[0x1b] = + { + "RSVD", + "ONE", + "SRC_COLOR", "SRC_ALPHA", + "DST_ALPHA", "DST_COLOR", + "SRC_ALPHA_SATURATE", + "CONST_COLOR", "CONST_ALPHA", + "SRC1_COLOR", "SRC1_ALPHA", + "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", + "ZERO", + "INV_SRC_COLOR", "INV_SRC_ALPHA", + "INV_DST_ALPHA", "INV_DST_COLOR", + "RSVD", + "INV_CONST_COLOR", "INV_CONST_ALPHA", + "INV_SRC1_COLOR", "INV_SRC1_ALPHA" + }; + + batch_out(brw, "BLEND", offset, 0, "Alpha blend/test\n"); + + if (((size) % 2) != 0) + fprintf(stderr, "Invalid blend state size %d\n", size); + + for (int i = 1; i < size / 4; i += 2) { + char name[sizeof("BLEND_ENTRYXXX")]; + sprintf(name, "BLEND_ENTRY%02d", (i - 1) / 2); + if (blend[i + 1] & GEN8_BLEND_LOGIC_OP_ENABLE) { + batch_out(brw, name, offset, i + 1, "%s\n", + logicop[GET_FIELD(blend[i + 1], + GEN8_BLEND_LOGIC_OP_FUNCTION)]); + } else if (blend[i] & GEN8_BLEND_COLOR_BUFFER_BLEND_ENABLE) { + batch_out64(brw, name, offset, i, + "\n\t\t\tColor Buffer Blend factor %s,%s,%s,%s (src,dst,src alpha, dst alpha)" + "\n\t\t\tfunction %s,%s (color, alpha), Disables: %c%c%c%c\n", + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_SRC_BLEND_FACTOR)], + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_DST_BLEND_FACTOR)], + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR)], + blend_factor[GET_FIELD(blend[i], + GEN8_BLEND_DST_ALPHA_BLEND_FACTOR)], + blend_function[GET_FIELD(blend[i], + GEN8_BLEND_COLOR_BLEND_FUNCTION)], + blend_function[GET_FIELD(blend[i], + GEN8_BLEND_ALPHA_BLEND_FUNCTION)], + blend[i] & GEN8_BLEND_WRITE_DISABLE_RED ? 'R' : '-', + blend[i] & GEN8_BLEND_WRITE_DISABLE_GREEN ? 'G' : '-', + blend[i] & GEN8_BLEND_WRITE_DISABLE_BLUE ? 'B' : '-', + blend[i] & GEN8_BLEND_WRITE_DISABLE_ALPHA ? 'A' : '-' + ); + } else if (!blend[i] && (blend[i + 1] == 0xb)) { + batch_out64(brw, name, offset, i, "NOP blend state\n"); + } else { + batch_out64(brw, name, offset, i, "????\n"); + } + } +} + +static void dump_scissor(struct brw_context *brw, uint32_t offset) { const char *name = "SCISSOR"; @@ -555,20 +809,29 @@ dump_state_batch(struct brw_context *brw) dump_cc_state_gen4(brw, offset); break; case AUB_TRACE_BLEND_STATE: - dump_blend_state(brw, offset); + if (brw->gen >= 8) + gen8_dump_blend_state(brw, offset, size); + else + dump_blend_state(brw, offset); break; case AUB_TRACE_BINDING_TABLE: dump_binding_table(brw, offset, size); break; case AUB_TRACE_SURFACE_STATE: - if (brw->gen < 7) { - dump_surface_state(brw, offset); - } else { + if (brw->gen >= 8) { + dump_gen8_surface_state(brw, offset, + brw->state_batch_list[i].index); + } else if (brw->gen >= 7) { dump_gen7_surface_state(brw, offset); - } + } else { + dump_surface_state(brw, offset); + } break; case AUB_TRACE_SAMPLER_STATE: - dump_sampler_state(brw, offset, size); + if (brw->gen >= 7) + gen7_dump_sampler_state(brw, offset, size); + else + dump_sampler_state(brw, offset, size); break; case AUB_TRACE_SAMPLER_DEFAULT_COLOR: dump_sdc(brw, offset); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 84b0861aaad..08d1ac28885 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -41,6 +41,7 @@ #include "brw_gs.h" #include "brw_wm.h" #include "brw_cs.h" +#include "main/framebuffer.h" static const struct brw_tracked_state *gen4_atoms[] = { @@ -660,6 +661,7 @@ brw_upload_pipeline_state(struct brw_context *brw, int i; static int dirty_count = 0; struct brw_state_flags state = brw->state.pipelines[pipeline]; + unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer); brw_select_pipeline(brw, pipeline); @@ -696,8 +698,8 @@ brw_upload_pipeline_state(struct brw_context *brw, brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS; } - if (brw->num_samples != ctx->DrawBuffer->Visual.samples) { - brw->num_samples = ctx->DrawBuffer->Visual.samples; + if (brw->num_samples != fb_samples) { + brw->num_samples = fb_samples; brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES; } diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 016f87a4c2a..05016067bba 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -39,13 +39,14 @@ struct surface_format_info { int input_vb; int streamed_output_vb; int color_processing; + const char *name; }; /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. */ #define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ - [sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color }, + [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf}, #define Y 0 #define x 999 @@ -73,6 +74,7 @@ struct surface_format_info { * VB - Input Vertex Buffer * SO - Steamed Output Vertex Buffers (transform feedback) * color - Color Processing + * sf - Surface Format * * See page 88 of the Sandybridge PRM VOL4_Part1 PDF. * @@ -85,230 +87,236 @@ struct surface_format_info { */ const struct surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color */ - SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_UINT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64_PASSTHRU) - SF( Y, 50, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_FLOAT) - SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_SINT) - SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_UINT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32_SFIXED) - SF( Y, Y, x, x, Y, 45, Y, x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT) - SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT) - SF( Y, 70, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT_LD) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_UINT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32A32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64_PASSTHRU) - SF( Y, Y, x, Y, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB) + SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_UINT) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R64G64_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_USCALED) + SF( x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) + SF( Y, 50, x, x, x, x, Y, Y, x, R32G32B32_FLOAT) + SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_SINT) + SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_UINT) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32B32_USCALED) + SF( x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) + SF( Y, Y, x, x, Y, 45, Y, x, 60, R16G16B16A16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16B16A16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16B16A16_FLOAT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32_FLOAT) + SF( Y, 70, x, x, Y, Y, Y, Y, x, R32G32_FLOAT_LD) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_UINT) + SF( Y, 50, Y, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) + SF( Y, 50, x, x, x, x, x, x, x, L32A32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R32G32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32G32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, A32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, L32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, I32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32G32_USCALED) + SF( x, x, x, x, x, x, x, x, x, R32G32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, R64_PASSTHRU) + SF( Y, Y, x, Y, Y, Y, Y, x, 60, B8G8R8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B8G8R8A8_UNORM_SRGB) /* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM) - SF( Y, Y, x, x, x, x, x, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10A2_UINT) - SF( Y, Y, x, x, x, Y, Y, x, x, BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM) - SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_UINT) - SF( Y, Y, x, x, Y, 45, Y, x, x, BRW_SURFACEFORMAT_R16G16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16_FLOAT) - SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R11G11B10_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_UINT) - SF( Y, 50, Y, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32_FLOAT) - SF( Y, Y, x, Y, x, x, x, x, 60, BRW_SURFACEFORMAT_B8G8R8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10X2_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, R10G10B10A2_UNORM) + SF( Y, Y, x, x, x, x, x, x, 60, R10G10B10A2_UNORM_SRGB) + SF( Y, x, x, x, Y, x, Y, x, x, R10G10B10A2_UINT) + SF( Y, Y, x, x, x, Y, Y, x, x, R10G10B10_SNORM_A2_UNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, R8G8B8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, R8G8B8A8_UNORM_SRGB) + SF( Y, Y, x, x, Y, 60, Y, x, x, R8G8B8A8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_UINT) + SF( Y, Y, x, x, Y, 45, Y, x, x, R16G16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R16G16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16_FLOAT) + SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, R11G11B10_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, R32_UINT) + SF( Y, 50, Y, x, Y, Y, Y, Y, x, R32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) + SF( Y, Y, x, x, x, x, x, x, x, L16A16_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, I24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, L24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, A24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, I32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, L32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, A32_FLOAT) + SF( Y, Y, x, Y, x, x, x, x, 60, B8G8R8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) + SF( Y, Y, x, x, x, x, x, x, x, B10G10R10X2_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, L16A16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R32_SNORM) /* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10X2_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_USCALED) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8G8_UNORM) - SF( Y, Y, x, Y, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_UINT) - SF( Y, Y, Y, x, Y, 45, Y, x, 70, BRW_SURFACEFORMAT_R16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16_FLOAT) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_FLOAT) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM_SRGB) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R10G10B10X2_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R32_USCALED) + SF( Y, Y, x, Y, Y, Y, x, x, x, B5G6R5_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B5G6R5_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, B5G5R5A1_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B5G5R5A1_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, B4G4R4A4_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, B4G4R4A4_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, R8G8_UNORM) + SF( Y, Y, x, Y, Y, 60, Y, x, x, R8G8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R8G8_UINT) + SF( Y, Y, Y, x, Y, 45, Y, x, 70, R16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, R16_FLOAT) + SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) + SF( Y, Y, Y, x, x, x, x, x, x, I16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, L16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, A16_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, L8A8_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, I16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, L16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, A16_FLOAT) + SF(45, 45, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) + SF( Y, Y, x, Y, x, x, x, x, x, R5G5_SNORM_B6_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM_SRGB) + SF( x, x, x, x, x, x, Y, x, x, R8G8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8_USCALED) /* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_USCALED) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_SINT) - SF( Y, Y, x, 45, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_UINT) - SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_A8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_USCALED) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM_SRGB) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_Y8_SNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_SINT) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R1_UINT) - SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL) - SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM_SRGB) - SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_MONO8) - SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUV) - SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPY) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB) + SF( x, x, x, x, x, x, Y, x, x, R16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16_USCALED) + SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) + SF( x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( x, x, x, x, x, x, x, x, x, L8A8_UINT) + SF( x, x, x, x, x, x, x, x, x, L8A8_SINT) + SF( Y, Y, x, 45, Y, Y, Y, x, x, R8_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, R8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, R8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, R8_UINT) + SF( Y, Y, x, Y, Y, Y, x, x, x, A8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, I8_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, L8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, P4A4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, A4P4_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8_USCALED) + SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, L8_UNORM_SRGB) + SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, Y8_SNORM) + SF( x, x, x, x, x, x, x, x, x, L8_UINT) + SF( x, x, x, x, x, x, x, x, x, L8_SINT) + SF( x, x, x, x, x, x, x, x, x, I8_UINT) + SF( x, x, x, x, x, x, x, x, x, I8_SINT) + SF(45, 45, x, x, x, x, x, x, x, DXT1_RGB_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, R1_UINT) + SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_NORMAL) + SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_SWAPUVY) + SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) + SF( Y, Y, x, Y, x, x, x, x, x, BC1_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, BC2_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, BC3_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC5_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC1_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, BC2_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, BC3_UNORM_SRGB) + SF( Y, x, x, x, x, x, x, x, x, MONO8) + SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPUV) + SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPY) + SF( Y, Y, x, x, x, x, x, x, x, DXT1_RGB) /* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_FXT1) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64A64_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_SNORM) - SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_SNORM) - SF(50, 50, x, x, x, x, 60, x, x, BRW_SURFACEFORMAT_R16G16B16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_UNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SNORM) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_USCALED) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_SF16) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB) - SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_UF16) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_PLANAR_420_8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC1_RGB8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_R11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_RG11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_R11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UINT) - SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_SINT) + SF( Y, Y, x, x, x, x, x, x, x, FXT1) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R8G8B8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, R64G64B64A64_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R64G64B64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, BC4_SNORM) + SF( Y, Y, x, x, x, x, x, x, x, BC5_SNORM) + SF(50, 50, x, x, x, x, 60, x, x, R16G16B16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_UNORM) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SNORM) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, R16G16B16_USCALED) + SF(70, 70, x, x, x, x, x, x, x, BC6H_SF16) + SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM) + SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM_SRGB) + SF(70, 70, x, x, x, x, x, x, x, BC6H_UF16) + SF( x, x, x, x, x, x, x, x, x, PLANAR_420_8) + SF( x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) + SF( x, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF( x, x, x, x, x, x, x, x, x, EAC_R11) + SF( x, x, x, x, x, x, x, x, x, EAC_RG11) + SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF( x, x, x, x, x, x, x, x, x, R16G16B16_UINT) + SF( x, x, x, x, x, x, x, x, x, R16G16B16_SINT) + SF( x, x, x, x, x, x, x, x, x, R32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) + SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT) + SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT) }; #undef x #undef Y +const char * +brw_surface_format_name(unsigned format) +{ + return surface_formats[format].name; +} + uint32_t brw_format_for_mesa_format(mesa_format mesa_format) { diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 72b02a2cf0a..998d8c42770 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -40,9 +40,88 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE static unsigned int +tr_mode_horizontal_texture_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) +{ + const unsigned *align_yf, *align_ys; + const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8; + unsigned ret_align, divisor; + + /* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below + * tables specifies the horizontal alignment requirement in elements + * for the surface. An element is defined as a pixel in uncompressed + * surface formats, and as a compression block in compressed surface + * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + * element is a sample. + */ + const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256}; + const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096}; + const unsigned align_2d_yf[] = {64, 64, 32, 32, 16}; + const unsigned align_2d_ys[] = {256, 256, 128, 128, 64}; + const unsigned align_3d_yf[] = {16, 8, 8, 8, 4}; + const unsigned align_3d_ys[] = {64, 32, 32, 32, 16}; + int i = 0; + + /* Alignment computations below assume bpp >= 8 and a power of 2. */ + assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)); + + switch(mt->target) { + case GL_TEXTURE_1D: + case GL_TEXTURE_1D_ARRAY: + align_yf = align_1d_yf; + align_ys = align_1d_ys; + break; + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + align_yf = align_2d_yf; + align_ys = align_2d_ys; + break; + case GL_TEXTURE_3D: + align_yf = align_3d_yf; + align_ys = align_3d_ys; + break; + default: + unreachable("not reached"); + } + + /* Compute array index. */ + i = ffs(bpp/8) - 1; + + ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? + align_yf[i] : align_ys[i]; + + assert(is_power_of_two(mt->num_samples)); + + switch (mt->num_samples) { + case 2: + case 4: + divisor = 2; + break; + case 8: + case 16: + divisor = 4; + break; + default: + divisor = 1; + break; + } + return ret_align / divisor; +} + + +static unsigned int intel_horizontal_texture_alignment_unit(struct brw_context *brw, - struct intel_mipmap_tree *mt) + struct intel_mipmap_tree *mt, + uint32_t layout_flags) { + if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) + return 16; + /** * From the "Alignment Unit Size" section of various specs, namely: * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 @@ -88,18 +167,85 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, if (mt->format == MESA_FORMAT_S_UINT8) return 8; + if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { + uint32_t align = tr_mode_horizontal_texture_alignment(brw, mt); + /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32. */ + return align < 32 ? 32 : align; + } + if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16) return 8; - if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1) - return 16; - return 4; } static unsigned int +tr_mode_vertical_texture_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) +{ + const unsigned *align_yf, *align_ys; + const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8; + unsigned ret_align, divisor; + + /* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */ + const unsigned align_2d_yf[] = {64, 32, 32, 16, 16}; + const unsigned align_2d_ys[] = {256, 128, 128, 64, 64}; + const unsigned align_3d_yf[] = {16, 16, 16, 8, 8}; + const unsigned align_3d_ys[] = {32, 32, 32, 16, 16}; + int i = 0; + + assert(brw->gen >= 9 && + mt->target != GL_TEXTURE_1D && + mt->target != GL_TEXTURE_1D_ARRAY); + + /* Alignment computations below assume bpp >= 8 and a power of 2. */ + assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)) ; + + switch(mt->target) { + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + align_yf = align_2d_yf; + align_ys = align_2d_ys; + break; + case GL_TEXTURE_3D: + align_yf = align_3d_yf; + align_ys = align_3d_ys; + break; + default: + unreachable("not reached"); + } + + /* Compute array index. */ + i = ffs(bpp / 8) - 1; + + ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? + align_yf[i] : align_ys[i]; + + assert(is_power_of_two(mt->num_samples)); + + switch (mt->num_samples) { + case 4: + case 8: + divisor = 2; + break; + case 16: + divisor = 4; + break; + default: + divisor = 1; + break; + } + return ret_align / divisor; +} + +static unsigned int intel_vertical_texture_alignment_unit(struct brw_context *brw, - mesa_format format, bool multisampled) + const struct intel_mipmap_tree *mt) { /** * From the "Alignment Unit Size" section of various specs, namely: @@ -124,23 +270,29 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of * the SURFACE_STATE "Surface Vertical Alignment" field. */ - if (_mesa_is_format_compressed(format)) + if (_mesa_is_format_compressed(mt->format)) /* See comment above for the horizontal alignment */ return brw->gen >= 9 ? 16 : 4; - if (format == MESA_FORMAT_S_UINT8) + if (mt->format == MESA_FORMAT_S_UINT8) return brw->gen >= 7 ? 8 : 4; + if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { + uint32_t align = tr_mode_vertical_texture_alignment(brw, mt); + /* XY_FAST_COPY_BLT doesn't support vertical alignment < 64 */ + return align < 64 ? 64 : align; + } + /* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4 * should always be used, except for stencil buffers, which should be 8. */ if (brw->gen >= 8) return 4; - if (multisampled) + if (mt->num_samples > 1) return 4; - GLenum base_format = _mesa_get_format_base_format(format); + GLenum base_format = _mesa_get_format_base_format(mt->format); if (brw->gen >= 6 && (base_format == GL_DEPTH_COMPONENT || @@ -161,7 +313,7 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw, * * VALIGN_4 is not supported for surface format R32G32B32_FLOAT. */ - if (base_format == GL_YCBCR_MESA || format == MESA_FORMAT_RGB_FLOAT32) + if (base_format == GL_YCBCR_MESA || mt->format == MESA_FORMAT_RGB_FLOAT32) return 2; return 4; @@ -348,9 +500,9 @@ align_cube(struct intel_mipmap_tree *mt) mt->total_height += 2; } -static bool -use_linear_1d_layout(struct brw_context *brw, - struct intel_mipmap_tree *mt) +bool +gen9_use_linear_1d_layout(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) { /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a * horizontal line. This isn't done for depth/stencil buffers however @@ -375,7 +527,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw, struct intel_mipmap_tree *mt) { unsigned height = mt->physical_height0; - bool layout_1d = use_linear_1d_layout(brw, mt); + bool layout_1d = gen9_use_linear_1d_layout(brw, mt); int physical_qpitch; if (layout_1d) @@ -458,46 +610,111 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, align_cube(mt); } -void -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) +/** + * \brief Helper function for intel_miptree_create(). + */ +static uint32_t +brw_miptree_choose_tiling(struct brw_context *brw, + enum intel_miptree_tiling_mode requested, + const struct intel_mipmap_tree *mt) { - bool multisampled = mt->num_samples > 1; - bool gen6_hiz_or_stencil = false; + if (mt->format == MESA_FORMAT_S_UINT8) { + /* The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + */ + return I915_TILING_NONE; + } - if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) { - const GLenum base_format = _mesa_get_format_base_format(mt->format); - gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format); + /* Some usages may want only one type of tiling, like depth miptrees (Y + * tiled), or temporary BOs for uploading data once (linear). + */ + switch (requested) { + case INTEL_MIPTREE_TILING_ANY: + break; + case INTEL_MIPTREE_TILING_Y: + return I915_TILING_Y; + case INTEL_MIPTREE_TILING_NONE: + return I915_TILING_NONE; } - if (gen6_hiz_or_stencil) { - /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the - * hardware doesn't support multiple mip levels on stencil/hiz. + if (mt->num_samples > 1) { + /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled + * Surface"): * - * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer: - * "The hierarchical depth buffer does not support the LOD field" + * [DevSNB+]: For multi-sample render targets, this field must be + * 1. MSRTs can only be tiled. * - * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer: - * "The stencil depth buffer does not support the LOD field" + * Our usual reason for preferring X tiling (fast blits using the + * blitting engine) doesn't apply to MSAA, since we'll generally be + * downsampling or upsampling when blitting between the MSAA buffer + * and another buffer, and the blitting engine doesn't support that. + * So use Y tiling, since it makes better use of the cache. */ - if (mt->format == MESA_FORMAT_S_UINT8) { - /* Stencil uses W tiling, so we force W tiling alignment for the - * ALL_SLICES_AT_EACH_LOD miptree layout. - */ - mt->align_w = 64; - mt->align_h = 64; - } else { - /* Depth uses Y tiling, so we force need Y tiling alignment for the - * ALL_SLICES_AT_EACH_LOD miptree layout. - */ - mt->align_w = 128 / mt->cpp; - mt->align_h = 32; - } - } else { - mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt); - mt->align_h = - intel_vertical_texture_alignment_unit(brw, mt->format, multisampled); + return I915_TILING_Y; + } + + GLenum base_format = _mesa_get_format_base_format(mt->format); + if (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT) + return I915_TILING_Y; + + /* 1D textures (and 1D array textures) don't get any benefit from tiling, + * in fact it leads to a less efficient use of memory space and bandwidth + * due to tile alignment. + */ + if (mt->logical_height0 == 1) + return I915_TILING_NONE; + + int minimum_pitch = mt->total_width * mt->cpp; + + /* If the width is much smaller than a tile, don't bother tiling. */ + if (minimum_pitch < 64) + return I915_TILING_NONE; + + if (ALIGN(minimum_pitch, 512) >= 32768 || + mt->total_width >= 32768 || mt->total_height >= 32768) { + perf_debug("%dx%d miptree too large to blit, falling back to untiled", + mt->total_width, mt->total_height); + return I915_TILING_NONE; + } + + /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ + if (brw->gen < 6) + return I915_TILING_X; + + /* From the Sandybridge PRM, Volume 1, Part 2, page 32: + * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX + * or Linear." + * 128 bits per pixel translates to 16 bytes per pixel. This is necessary + * all the way back to 965, but is permitted on Gen7+. + */ + if (brw->gen < 7 && mt->cpp >= 16) + return I915_TILING_X; + + /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most + * messages), on p64, under the heading "Surface Vertical Alignment": + * + * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + * + * So if the surface is renderable and uses a vertical alignment of 2, + * force it to be X tiled. This is somewhat conservative (it's possible + * that the client won't ever render to this surface), but it's difficult + * to know that ahead of time. And besides, since we use a vertical + * alignment of 4 as often as we can, this shouldn't happen very often. + */ + if (brw->gen == 7 && mt->align_h == 2 && + brw->format_supported_as_render_target[mt->format]) { + return I915_TILING_X; } + return I915_TILING_Y | I915_TILING_X; +} + +static void +intel_miptree_set_total_width_height(struct brw_context *brw, + struct intel_mipmap_tree *mt) +{ switch (mt->target) { case GL_TEXTURE_CUBE_MAP: if (brw->gen == 4) { @@ -532,7 +749,7 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) break; case INTEL_MSAA_LAYOUT_NONE: case INTEL_MSAA_LAYOUT_IMS: - if (use_linear_1d_layout(brw, mt)) + if (gen9_use_linear_1d_layout(brw, mt)) gen9_miptree_layout_1d(mt); else brw_miptree_layout_2d(mt); @@ -540,8 +757,62 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) } break; } + DBG("%s: %dx%dx%d\n", __func__, mt->total_width, mt->total_height, mt->cpp); +} + +void +brw_miptree_layout(struct brw_context *brw, + struct intel_mipmap_tree *mt, + enum intel_miptree_tiling_mode requested, + uint32_t layout_flags) +{ + bool gen6_hiz_or_stencil = false; + + mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE; + + if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) { + const GLenum base_format = _mesa_get_format_base_format(mt->format); + gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format); + } + + if (gen6_hiz_or_stencil) { + /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the + * hardware doesn't support multiple mip levels on stencil/hiz. + * + * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer: + * "The hierarchical depth buffer does not support the LOD field" + * + * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer: + * "The stencil depth buffer does not support the LOD field" + */ + if (mt->format == MESA_FORMAT_S_UINT8) { + /* Stencil uses W tiling, so we force W tiling alignment for the + * ALL_SLICES_AT_EACH_LOD miptree layout. + */ + mt->align_w = 64; + mt->align_h = 64; + assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); + } else { + /* Depth uses Y tiling, so we force need Y tiling alignment for the + * ALL_SLICES_AT_EACH_LOD miptree layout. + */ + mt->align_w = 128 / mt->cpp; + mt->align_h = 32; + } + } else { + mt->align_w = + intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); + mt->align_h = intel_vertical_texture_alignment_unit(brw, mt); + } + + intel_miptree_set_total_width_height(brw, mt); + + if (!mt->total_width || !mt->total_height) { + intel_miptree_release(&mt); + return; + } /* On Gen9+ the alignment values are expressed in multiples of the block * size @@ -552,5 +823,8 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) mt->align_w /= i; mt->align_h /= j; } + + if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0) + mt->tiling = brw_miptree_choose_tiling(brw, requested, mt); } diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index b548d234538..04e4e944118 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -35,9 +35,47 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "brw_context.h" extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); extern GLenum brw_fix_xRGB_alpha(GLenum function); +static inline uint32_t +brw_get_line_width(struct brw_context *brw) +{ + /* From the OpenGL 4.4 spec: + * + * "The actual width of non-antialiased lines is determined by rounding + * the supplied width to the nearest integer, then clamping it to the + * implementation-dependent maximum non-antialiased line width." + */ + float line_width = + CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag + ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, + 0.0, brw->ctx.Const.MaxLineWidth); + uint32_t line_width_u3_7 = U_FIXED(line_width, 7); + + /* Line width of 0 is not allowed when MSAA enabled */ + if (brw->ctx.Multisample._Enabled) { + if (line_width_u3_7 == 0) + line_width_u3_7 = 1; + } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5) { + /* For 1 pixel line thickness or less, the general + * anti-aliasing algorithm gives up, and a garbage line is + * generated. Setting a Line Width of 0.0 specifies the + * rasterization of the "thinnest" (one-pixel-wide), + * non-antialiased lines. + * + * Lines rendered with zero Line Width are rasterized using + * Grid Intersection Quantization rules as specified by + * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line + * Rasterization. + */ + line_width_u3_7 = 0; + } + + return line_width_u3_7; +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 2841d983ad5..a5c686ceaaf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -35,6 +35,7 @@ extern "C" { #include "program/prog_print.h" #include "program/prog_parameter.h" } +#include "main/context.h" #define MAX_INSTRUCTION (1 << 30) @@ -1676,20 +1677,16 @@ vec4_visitor::emit_shader_time_end() */ emit(ADD(diff, src_reg(diff), src_reg(-2u))); - emit_shader_time_write(st_base, src_reg(diff)); - emit_shader_time_write(st_written, src_reg(1u)); + emit_shader_time_write(0, src_reg(diff)); + emit_shader_time_write(1, src_reg(1u)); emit(BRW_OPCODE_ELSE); - emit_shader_time_write(st_reset, src_reg(1u)); + emit_shader_time_write(2, src_reg(1u)); emit(BRW_OPCODE_ENDIF); } void -vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, - src_reg value) +vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) { - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - dst_reg dst = dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2)); @@ -1698,7 +1695,8 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, time.reg_offset++; offset.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE))); + int index = shader_time_index * 3 + shader_time_subindex; + emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; emit(MOV(time, src_reg(value))); @@ -1709,11 +1707,11 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, } bool -vec4_visitor::run() +vec4_visitor::run(gl_clip_plane *clip_planes) { sanity_param_count = prog->Parameters->NumParameters; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) + if (shader_time_index >= 0) emit_shader_time_begin(); assign_binding_table_offsets(); @@ -1731,7 +1729,7 @@ vec4_visitor::run() base_ir = NULL; if (key->userclip_active && !prog->UsesClipDistanceOut) - setup_uniform_clipplane_values(); + setup_uniform_clipplane_values(clip_planes); emit_thread_end(); @@ -1768,7 +1766,7 @@ vec4_visitor::run() snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \ stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ - backend_visitor::dump_instructions(filename); \ + backend_shader::dump_instructions(filename); \ } \ \ progress = progress || this_progress; \ @@ -1781,7 +1779,7 @@ vec4_visitor::run() snprintf(filename, 64, "%s-%04d-00-start", stage_abbrev, shader_prog ? shader_prog->Name : 0); - backend_visitor::dump_instructions(filename); + backend_shader::dump_instructions(filename); } bool progress; @@ -1868,8 +1866,6 @@ brw_vs_emit(struct brw_context *brw, bool start_busy = false; double start_time = 0; const unsigned *assembly = NULL; - bool use_nir = - brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions != NULL; if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && @@ -1881,22 +1877,33 @@ brw_vs_emit(struct brw_context *brw, if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base, + ST_VS); + if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); - if (use_nir && !c->vp->program.Base.nir) { - /* Normally we generate NIR in LinkShader() or ProgramStringNotify(), but - * Mesa's fixed-function vertex program handling doesn't notify the driver - * at all. Just do it here, at the last minute, even though it's lame. - */ - assert(c->vp->program.Base.Id == 0 && prog == NULL); - c->vp->program.Base.nir = - brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX); - } + if (brw->intelScreen->compiler->scalar_vs) { + if (!c->vp->program.Base.nir) { + /* Normally we generate NIR in LinkShader() or + * ProgramStringNotify(), but Mesa's fixed-function vertex program + * handling doesn't notify the driver at all. Just do it here, at + * the last minute, even though it's lame. + */ + assert(c->vp->program.Base.Id == 0 && prog == NULL); + c->vp->program.Base.nir = + brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX); + } - if (brw->scalar_vs && (prog || use_nir)) { - fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8); - if (!v.run_vs()) { + prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; + + fs_visitor v(brw->intelScreen->compiler, brw, + mem_ctx, MESA_SHADER_VERTEX, &c->key, + &prog_data->base.base, prog, &c->vp->program.Base, + 8, st_index); + if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, v.fail_msg); @@ -1908,7 +1915,8 @@ brw_vs_emit(struct brw_context *brw, return NULL; } - fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base, + fs_generator g(brw->intelScreen->compiler, brw, + mem_ctx, (void *) &c->key, &prog_data->base.base, &c->vp->program.Base, v.promoted_constants, v.runtime_check_aads_emit, "VS"); if (INTEL_DEBUG & DEBUG_VS) { @@ -1926,13 +1934,16 @@ brw_vs_emit(struct brw_context *brw, g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); - prog_data->base.simd8 = true; c->base.last_scratch = v.last_scratch; } if (!assembly) { - vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); - if (!v.run()) { + prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + + vec4_vs_visitor v(brw->intelScreen->compiler, + c, prog_data, prog, mem_ctx, st_index, + !_mesa_is_gles3(&brw->ctx)); + if (!v.run(brw_select_clip_planes(&brw->ctx))) { if (prog) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, v.fail_msg); @@ -1944,7 +1955,8 @@ brw_vs_emit(struct brw_context *brw, return NULL; } - vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base, + vec4_generator g(brw->intelScreen->compiler, brw, + prog, &c->vp->program.Base, &prog_data->base, mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); assembly = g.generate_assembly(v.cfg, final_assembly_size); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 628c6313cc9..2ac16932189 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -73,10 +73,10 @@ class vec4_live_variables; * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and * fixed-function) into VS IR. */ -class vec4_visitor : public backend_visitor +class vec4_visitor : public backend_shader, public ir_visitor { public: - vec4_visitor(struct brw_context *brw, + vec4_visitor(const struct brw_compiler *compiler, struct brw_vec4_compile *c, struct gl_program *prog, const struct brw_vue_prog_key *key, @@ -85,9 +85,7 @@ public: gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset); + int shader_time_index); ~vec4_visitor(); dst_reg dst_null_f() @@ -160,6 +158,7 @@ public: virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); /*@}*/ src_reg result; @@ -178,10 +177,10 @@ public: struct hash_table *variable_ht; - bool run(void); + bool run(gl_clip_plane *clip_planes); void fail(const char *msg, ...); - void setup_uniform_clipplane_values(); + void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); @@ -344,8 +343,7 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); - void emit_shader_time_write(enum shader_time_shader_type type, - src_reg value); + void emit_shader_time_write(int shader_time_subindex, src_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, dst_reg dst, src_reg offset, src_reg src0, @@ -412,9 +410,7 @@ private: */ const bool no_spills; - const shader_time_shader_type st_base; - const shader_time_shader_type st_written; - const shader_time_shader_type st_reset; + int shader_time_index; }; @@ -426,7 +422,7 @@ private: class vec4_generator { public: - vec4_generator(struct brw_context *brw, + vec4_generator(const struct brw_compiler *compiler, void *log_data, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_vue_prog_data *prog_data, @@ -508,7 +504,9 @@ private: struct brw_reg dst); void generate_unpack_flags(struct brw_reg dst); - struct brw_context *brw; + const struct brw_compiler *compiler; + void *log_data; /* Passed to compiler->*_log functions */ + const struct brw_device_info *devinfo; struct brw_codegen *p; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 9147c3cbb79..c9fe0cebf27 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -114,8 +114,16 @@ instructions_match(vec4_instruction *a, vec4_instruction *b) { return a->opcode == b->opcode && a->saturate == b->saturate && + a->predicate == b->predicate && + a->predicate_inverse == b->predicate_inverse && a->conditional_mod == b->conditional_mod && + a->flag_subreg == b->flag_subreg && a->dst.type == b->dst.type && + a->offset == b->offset && + a->mlen == b->mlen && + a->base_mrf == b->base_mrf && + a->header_size == b->header_size && + a->shadow_compare == b->shadow_compare && a->dst.writemask == b->dst.writemask && a->force_writemask_all == b->force_writemask_all && a->regs_written == b->regs_written && diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index ef77b8df051..d2de2f0be25 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -134,7 +134,8 @@ vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i) return brw_reg; } -vec4_generator::vec4_generator(struct brw_context *brw, +vec4_generator::vec4_generator(const struct brw_compiler *compiler, + void *log_data, struct gl_shader_program *shader_prog, struct gl_program *prog, struct brw_vue_prog_data *prog_data, @@ -142,13 +143,13 @@ vec4_generator::vec4_generator(struct brw_context *brw, bool debug_flag, const char *stage_name, const char *stage_abbrev) - : brw(brw), devinfo(brw->intelScreen->devinfo), + : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), shader_prog(shader_prog), prog(prog), prog_data(prog_data), mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev), debug_flag(debug_flag) { p = rzalloc(mem_ctx, struct brw_codegen); - brw_init_codegen(brw->intelScreen->devinfo, p, mem_ctx); + brw_init_codegen(devinfo, p, mem_ctx); } vec4_generator::~vec4_generator() @@ -398,30 +399,25 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ - /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* Some care required: `sampler` and `temp` may alias: - * addr = sampler & 0xff - * temp = (sampler << 8) & 0xf00 - * addr = addr | temp - */ - brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); - brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); - brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); - brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); - brw_OR(p, addr, addr, temp); + /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (base_binding_table_index) + brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); + brw_AND(p, addr, addr, brw_imm_ud(0xfff)); brw_pop_insn_state(p); + if (inst->base_mrf != -1) + gen6_resolve_implied_move(p, &src, inst->base_mrf); + /* dst = send(offset, a0.0 | <descriptor>) */ brw_inst *insn = brw_send_indirect_message( p, BRW_SFID_SAMPLER, dst, src, addr); @@ -1631,16 +1627,11 @@ vec4_generator::generate_code(const cfg_t *cfg) ralloc_free(annotation.ann); } - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s vec4 shader: %d inst, %d loops, " - "compacted %d to %d bytes.\n", - stage_abbrev, - before_size / 16, loop_count, - before_size, after_size); + compiler->shader_debug_log(log_data, + "%s vec4 shader: %d inst, %d loops, " + "compacted %d to %d bytes.\n", + stage_abbrev, before_size / 16, loop_count, + before_size, after_size); } const unsigned * diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 363e30e34e4..69bcf5afc51 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -34,15 +34,15 @@ const unsigned MAX_GS_INPUT_VERTICES = 6; namespace brw { -vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, +vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, - bool no_spills) - : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, + bool no_spills, + int shader_time_index) + : vec4_visitor(compiler, &c->base, &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, - no_spills, - ST_GS, ST_GS_WRITTEN, ST_GS_RESET), + no_spills, shader_time_index), c(c) { } @@ -106,7 +106,7 @@ vec4_gs_visitor::setup_payload() * to be interleaved, so one register contains two attribute slots. */ int attributes_per_reg = - c->prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2; + c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; /* If a geometry shader tries to read from an input that wasn't written by * the vertex shader, that produces undefined results, but it shouldn't @@ -629,7 +629,8 @@ generate_assembly(struct brw_context *brw, const cfg_t *cfg, unsigned *final_assembly_size) { - vec4_generator g(brw, shader_prog, prog, prog_data, mem_ctx, + vec4_generator g(brw->intelScreen->compiler, brw, + shader_prog, prog, prog_data, mem_ctx, INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); return g.generate_assembly(cfg, final_assembly_size); } @@ -648,6 +649,10 @@ brw_gs_emit(struct brw_context *brw, brw_dump_ir("geometry", prog, &shader->base, NULL); } + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS); + if (brw->gen >= 7) { /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do * so without spilling. If the GS invocations count > 1, then we can't use @@ -655,10 +660,11 @@ brw_gs_emit(struct brw_context *brw, */ if (c->prog_data.invocations <= 1 && likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { - c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_OBJECT; + c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */); - if (v.run()) { + vec4_gs_visitor v(brw->intelScreen->compiler, + c, prog, mem_ctx, true /* no_spills */, st_index); + if (v.run(NULL /* clip planes */)) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, final_assembly_size); @@ -690,19 +696,23 @@ brw_gs_emit(struct brw_context *brw, * SINGLE mode. */ if (c->prog_data.invocations <= 1 || brw->gen < 7) - c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_SINGLE; + c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE; else - c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE; + c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE; vec4_gs_visitor *gs = NULL; const unsigned *ret = NULL; if (brw->gen >= 7) - gs = new vec4_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */); + gs = new vec4_gs_visitor(brw->intelScreen->compiler, + c, prog, mem_ctx, false /* no_spills */, + st_index); else - gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */); + gs = new gen6_gs_visitor(brw->intelScreen->compiler, + c, prog, mem_ctx, false /* no_spills */, + st_index); - if (!gs->run()) { + if (!gs->run(NULL /* clip planes */)) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, gs->fail_msg); } else { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index bcb5a2bcfc1..e693c56b58f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -68,11 +68,12 @@ namespace brw { class vec4_gs_visitor : public vec4_visitor { public: - vec4_gs_visitor(struct brw_context *brw, + vec4_gs_visitor(const struct brw_compiler *compiler, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, - bool no_spills); + bool no_spills, + int shader_time_index); protected: virtual dst_reg *make_reg_for_system_value(ir_variable *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 5368a75bc0f..555c42e2f24 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -191,7 +191,6 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g, bool vec4_visitor::reg_allocate() { - struct brw_compiler *compiler = brw->intelScreen->compiler; unsigned int hw_reg_mapping[alloc.count]; int payload_reg_count = this->first_non_payload_grf; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e51c140c0f2..236fa51f92c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -684,9 +684,12 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) * order we'd walk the type, so walk the list of storage and find anything * with our name, or the prefix of a component that starts with our name. */ - for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) { + for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + if (storage->builtin) + continue; + if (strncmp(ir->name, storage->name, namelen) != 0 || (storage->name[namelen] != 0 && storage->name[namelen] != '.' && @@ -718,10 +721,8 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) } void -vec4_visitor::setup_uniform_clipplane_values() +vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) { - gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); - for (int i = 0; i < key->nr_userclip_plane_consts; ++i) { assert(this->uniforms < uniform_array_size); this->uniform_vector_size[this->uniforms] = 4; @@ -2461,11 +2462,27 @@ vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS, dst_reg(this, glsl_type::uvec4_type)); inst->base_mrf = 2; - inst->mlen = 1; inst->src[1] = sampler; + int param_base; + + if (devinfo->gen >= 9) { + /* Gen9+ needs a message header in order to use SIMD4x2 mode */ + vec4_instruction *header_inst = new(mem_ctx) + vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, + dst_reg(MRF, inst->base_mrf)); + + emit(header_inst); + + inst->mlen = 2; + inst->header_size = 1; + param_base = inst->base_mrf + 1; + } else { + inst->mlen = 1; + param_base = inst->base_mrf; + } + /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */ - int param_base = inst->base_mrf; int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; int zero_mask = 0xf & ~coord_mask; @@ -2949,6 +2966,12 @@ vec4_visitor::visit(ir_end_primitive *) } void +vec4_visitor::visit(ir_barrier *) +{ + unreachable("not reached"); +} + +void vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, dst_reg dst, src_reg offset, src_reg src0, src_reg src1) @@ -3655,7 +3678,7 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg) *reg = neg_result; } -vec4_visitor::vec4_visitor(struct brw_context *brw, +vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, struct brw_vec4_compile *c, struct gl_program *prog, const struct brw_vue_prog_key *key, @@ -3664,10 +3687,9 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset) - : backend_visitor(brw, shader_prog, prog, &prog_data->base, stage), + int shader_time_index) + : backend_shader(compiler, NULL, mem_ctx, + shader_prog, prog, &prog_data->base, stage), c(c), key(key), prog_data(prog_data), @@ -3676,11 +3698,8 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), no_spills(no_spills), - st_base(st_base), - st_written(st_written), - st_reset(st_reset) + shader_time_index(shader_time_index) { - this->mem_ctx = mem_ctx; this->failed = false; this->base_ir = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp index 92d108598a2..dcbd2405078 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp @@ -381,8 +381,7 @@ vec4_vs_visitor::emit_program_code() break; default: - _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n", - _mesa_opcode_string(vpi->Opcode)); + assert(!"Unsupported opcode in vertex program"); } /* Copy the temporary back into the actual destination register. */ @@ -574,15 +573,13 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) break; default: - _mesa_problem(ctx, "bad uniform src register file: %s\n", - _mesa_register_file_name((gl_register_file)src.File)); + assert(!"Bad uniform in src register file"); return src_reg(this, glsl_type::vec4_type); } break; default: - _mesa_problem(ctx, "bad src register file: %s\n", - _mesa_register_file_name((gl_register_file)src.File)); + assert(!"Bad src register file"); return src_reg(this, glsl_type::vec4_type); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 4baf73ebde1..f93062b46d0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -23,7 +23,6 @@ #include "brw_vs.h" -#include "main/context.h" namespace brw { @@ -78,7 +77,7 @@ vec4_vs_visitor::emit_prolog() /* ES 3.0 has different rules for converting signed normalized * fixed-point numbers than desktop GL. */ - if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) { + if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) { /* According to equation 2.2 of the ES 3.0 specification, * signed normalization conversion is done by: * @@ -212,18 +211,21 @@ vec4_vs_visitor::emit_thread_end() } -vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw, +vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, struct brw_vs_compile *vs_compile, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, - void *mem_ctx) - : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base, + void *mem_ctx, + int shader_time_index, + bool use_legacy_snorm_formula) + : vec4_visitor(compiler, &vs_compile->base, &vs_compile->vp->program.Base, &vs_compile->key.base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, false /* no_spills */, - ST_VS, ST_VS_WRITTEN, ST_VS_RESET), + shader_time_index), vs_compile(vs_compile), - vs_prog_data(vs_prog_data) + vs_prog_data(vs_prog_data), + use_legacy_snorm_formula(use_legacy_snorm_formula) { } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index d03567e33b8..6e9848fb1e9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -40,108 +40,6 @@ #include "util/ralloc.h" -static inline void assign_vue_slot(struct brw_vue_map *vue_map, - int varying) -{ - /* Make sure this varying hasn't been assigned a slot already */ - assert (vue_map->varying_to_slot[varying] == -1); - - vue_map->varying_to_slot[varying] = vue_map->num_slots; - vue_map->slot_to_varying[vue_map->num_slots++] = varying; -} - -/** - * Compute the VUE map for vertex shader program. - */ -void -brw_compute_vue_map(const struct brw_device_info *devinfo, - struct brw_vue_map *vue_map, - GLbitfield64 slots_valid) -{ - vue_map->slots_valid = slots_valid; - int i; - - /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they - * are stored in the first VUE slot (VARYING_SLOT_PSIZ). - */ - slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); - - /* Make sure that the values we store in vue_map->varying_to_slot and - * vue_map->slot_to_varying won't overflow the signed chars that are used - * to store them. Note that since vue_map->slot_to_varying sometimes holds - * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that - * BRW_VARYING_SLOT_COUNT is <= 127, not 128. - */ - STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127); - - vue_map->num_slots = 0; - for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) { - vue_map->varying_to_slot[i] = -1; - vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT; - } - - /* VUE header: format depends on chip generation and whether clipping is - * enabled. - */ - if (devinfo->gen < 6) { - /* There are 8 dwords in VUE header pre-Ironlake: - * dword 0-3 is indices, point width, clip flags. - * dword 4-7 is ndc position - * dword 8-11 is the first vertex data. - * - * On Ironlake the VUE header is nominally 20 dwords, but the hardware - * will accept the same header layout as Gen4 [and should be a bit faster] - */ - assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); - assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC); - assign_vue_slot(vue_map, VARYING_SLOT_POS); - } else { - /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: - * dword 0-3 of the header is indices, point width, clip flags. - * dword 4-7 is the 4D space position - * dword 8-15 of the vertex header is the user clip distance if - * enabled. - * dword 8-11 or 16-19 is the first vertex element data we fill. - */ - assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); - assign_vue_slot(vue_map, VARYING_SLOT_POS); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)) - assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)) - assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1); - - /* front and back colors need to be consecutive so that we can use - * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing - * two-sided color. - */ - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0)) - assign_vue_slot(vue_map, VARYING_SLOT_COL0); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0)) - assign_vue_slot(vue_map, VARYING_SLOT_BFC0); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1)) - assign_vue_slot(vue_map, VARYING_SLOT_COL1); - if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1)) - assign_vue_slot(vue_map, VARYING_SLOT_BFC1); - } - - /* The hardware doesn't care about the rest of the vertex outputs, so just - * assign them contiguously. Don't reassign outputs that already have a - * slot. - * - * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX, - * since it's encoded as the clip distances by emit_clip_distances(). - * However, it may be output by transform feedback, and we'd rather not - * recompute state when TF changes, so we just always include it. - */ - for (int i = 0; i < VARYING_SLOT_MAX; ++i) { - if ((slots_valid & BITFIELD64_BIT(i)) && - vue_map->varying_to_slot[i] == -1) { - assign_vue_slot(vue_map, i); - } - } -} - - /** * Decide which set of clip planes should be used when clipping via * gl_Position or gl_ClipVertex. diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 6157ae6ffa9..61f9b006a58 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -90,11 +90,13 @@ namespace brw { class vec4_vs_visitor : public vec4_visitor { public: - vec4_vs_visitor(struct brw_context *brw, + vec4_vs_visitor(const struct brw_compiler *compiler, struct brw_vs_compile *vs_compile, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, - void *mem_ctx); + void *mem_ctx, + int shader_time_index, + bool use_legacy_snorm_formula); protected: virtual dst_reg *make_reg_for_system_value(ir_variable *ir); @@ -115,6 +117,8 @@ private: struct brw_vs_prog_data * const vs_prog_data; src_reg *vp_temp_regs; src_reg vp_addr_reg; + + bool use_legacy_snorm_formula; }; } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index f82a62b4851..b2f91bd412b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -121,7 +121,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) /* BRW_NEW_VS_PROG_DATA */ const struct brw_stage_prog_data *prog_data = &brw->vs.prog_data->base.base; - dword_pitch = brw->vs.prog_data->base.simd8; + dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; /* _NEW_PROGRAM_CONSTANTS */ brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program.Base, @@ -151,7 +151,7 @@ brw_upload_vs_ubo_surfaces(struct brw_context *brw) return; /* BRW_NEW_VS_PROG_DATA */ - dword_pitch = brw->vs.prog_data->base.simd8; + dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], &brw->vs.base, &brw->vs.prog_data->base.base, dword_pitch); diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c new file mode 100644 index 00000000000..76875789ba8 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vue_map.c @@ -0,0 +1,148 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file brw_vue_map.c + * + * This file computes the "VUE map" for a (non-fragment) shader stage, which + * describes the layout of its output varyings. The VUE map is used to match + * outputs from one stage with the inputs of the next. + * + * Largely, varyings can be placed however we like - producers/consumers simply + * have to agree on the layout. However, there is also a "VUE Header" that + * prescribes a fixed-layout for items that interact with fixed function + * hardware, such as the clipper and rasterizer. + * + * Authors: + * Paul Berry <[email protected]> + * Chris Forbes <[email protected]> + * Eric Anholt <[email protected]> + */ + + +#include "main/compiler.h" +#include "brw_context.h" + +static inline void +assign_vue_slot(struct brw_vue_map *vue_map, int varying) +{ + /* Make sure this varying hasn't been assigned a slot already */ + assert (vue_map->varying_to_slot[varying] == -1); + + vue_map->varying_to_slot[varying] = vue_map->num_slots; + vue_map->slot_to_varying[vue_map->num_slots++] = varying; +} + +/** + * Compute the VUE map for a shader stage. + */ +void +brw_compute_vue_map(const struct brw_device_info *devinfo, + struct brw_vue_map *vue_map, + GLbitfield64 slots_valid) +{ + vue_map->slots_valid = slots_valid; + int i; + + /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they + * are stored in the first VUE slot (VARYING_SLOT_PSIZ). + */ + slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + /* Make sure that the values we store in vue_map->varying_to_slot and + * vue_map->slot_to_varying won't overflow the signed chars that are used + * to store them. Note that since vue_map->slot_to_varying sometimes holds + * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that + * BRW_VARYING_SLOT_COUNT is <= 127, not 128. + */ + STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127); + + vue_map->num_slots = 0; + for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) { + vue_map->varying_to_slot[i] = -1; + vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT; + } + + /* VUE header: format depends on chip generation and whether clipping is + * enabled. + * + * See the Sandybridge PRM, Volume 2 Part 1, section 1.5.1 (page 30), + * "Vertex URB Entry (VUE) Formats" which describes the VUE header layout. + */ + if (devinfo->gen < 6) { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 is indices, point width, clip flags. + * dword 4-7 is ndc position + * dword 8-11 is the first vertex data. + * + * On Ironlake the VUE header is nominally 20 dwords, but the hardware + * will accept the same header layout as Gen4 [and should be a bit faster] + */ + assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); + assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC); + assign_vue_slot(vue_map, VARYING_SLOT_POS); + } else { + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 of the header is indices, point width, clip flags. + * dword 4-7 is the 4D space position + * dword 8-15 of the vertex header is the user clip distance if + * enabled. + * dword 8-11 or 16-19 is the first vertex element data we fill. + */ + assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); + assign_vue_slot(vue_map, VARYING_SLOT_POS); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)) + assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)) + assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1); + + /* front and back colors need to be consecutive so that we can use + * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing + * two-sided color. + */ + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0)) + assign_vue_slot(vue_map, VARYING_SLOT_COL0); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + assign_vue_slot(vue_map, VARYING_SLOT_BFC0); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1)) + assign_vue_slot(vue_map, VARYING_SLOT_COL1); + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + assign_vue_slot(vue_map, VARYING_SLOT_BFC1); + } + + /* The hardware doesn't care about the rest of the vertex outputs, so just + * assign them contiguously. Don't reassign outputs that already have a + * slot. + * + * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX, + * since it's encoded as the clip distances by emit_clip_distances(). + * However, it may be output by transform feedback, and we'd rather not + * recompute state when TF changes, so we just always include it. + */ + for (int i = 0; i < VARYING_SLOT_MAX; ++i) { + if ((slots_valid & BITFIELD64_BIT(i)) && + vue_map->varying_to_slot[i] == -1) { + assign_vue_slot(vue_map, i); + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 5496225a6c7..4619ce1080d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -36,6 +36,7 @@ #include "main/formats.h" #include "main/fbobject.h" #include "main/samplerobj.h" +#include "main/framebuffer.h" #include "program/prog_parameter.h" #include "program/program.h" #include "intel_mipmap_tree.h" @@ -462,7 +463,7 @@ static void brw_wm_populate_key( struct brw_context *brw, GLuint lookup = 0; GLuint line_aa; bool program_uses_dfdy = fp->program.UsesDFdy; - bool multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; memset(key, 0, sizeof(*key)); @@ -561,7 +562,7 @@ static void brw_wm_populate_key( struct brw_context *brw, * drawable height in order to invert the Y axis. */ if (fp->program.Base.InputsRead & VARYING_BIT_POS) { - key->drawable_height = ctx->DrawBuffer->Height; + key->drawable_height = _mesa_geometric_height(ctx->DrawBuffer); } if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { @@ -580,7 +581,7 @@ static void brw_wm_populate_key( struct brw_context *brw, key->persample_shading = _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; if (key->persample_shading) - key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + key->persample_2x = _mesa_geometric_samples(ctx->DrawBuffer) == 2; key->compute_pos_offset = _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 160dd2f6c62..72aad96bb6a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -35,6 +35,7 @@ #include "main/mtypes.h" #include "main/samplerobj.h" #include "program/prog_parameter.h" +#include "main/framebuffer.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" @@ -738,6 +739,9 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, uint32_t *surf_offset) { GLuint i; + const unsigned int w = _mesa_geometric_width(fb); + const unsigned int h = _mesa_geometric_height(fb); + const unsigned int s = _mesa_geometric_samples(fb); /* Update surfaces for drawing buffers */ if (fb->_NumColorDrawBuffers >= 1) { @@ -748,17 +752,15 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw, surf_offset[surf_index] = brw->vtbl.update_renderbuffer_surface( brw, fb->_ColorDrawBuffers[i], - fb->MaxNumLayers > 0, i, surf_index); + _mesa_geometric_layers(fb) > 0, i, surf_index); } else { - brw->vtbl.emit_null_surface_state( - brw, fb->Width, fb->Height, fb->Visual.samples, + brw->vtbl.emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]); } } } else { const uint32_t surf_index = render_target_start; - brw->vtbl.emit_null_surface_state( - brw, fb->Width, fb->Height, fb->Visual.samples, + brw->vtbl.emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]); } } diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index aaf90df2b9c..9a29366f0e0 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -31,6 +31,7 @@ #include "brw_util.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" static void upload_clip_state(struct brw_context *brw) @@ -145,11 +146,14 @@ upload_clip_state(struct brw_context *brw) * the viewport, so we can ignore this restriction. */ if (brw->gen < 8) { + const float fb_width = (float)_mesa_geometric_width(fb); + const float fb_height = (float)_mesa_geometric_height(fb); + for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { if (ctx->ViewportArray[i].X != 0 || ctx->ViewportArray[i].Y != 0 || - ctx->ViewportArray[i].Width != (float) fb->Width || - ctx->ViewportArray[i].Height != (float) fb->Height) { + ctx->ViewportArray[i].Width != fb_width || + ctx->ViewportArray[i].Height != fb_height) { dw2 &= ~GEN6_CLIP_GB_TEST; break; } @@ -179,7 +183,7 @@ upload_clip_state(struct brw_context *brw) dw2); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | - (fb->MaxNumLayers > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) | + (_mesa_geometric_layers(fb) > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) | ((ctx->Const.MaxViewports - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK)); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index 28f23c9e4f7..27254ebb727 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -35,12 +35,13 @@ namespace brw { class gen6_gs_visitor : public vec4_gs_visitor { public: - gen6_gs_visitor(struct brw_context *brw, + gen6_gs_visitor(const struct brw_compiler *comp, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, - bool no_spills) : - vec4_gs_visitor(brw, c, prog, mem_ctx, no_spills) {} + bool no_spills, + int shader_time_index) : + vec4_gs_visitor(comp, c, prog, mem_ctx, no_spills, shader_time_index) {} protected: virtual void assign_binding_table_offsets(); diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index ec46479ff75..36734f598fe 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -26,6 +26,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_multisample_state.h" +#include "main/framebuffer.h" void gen6_get_sample_position(struct gl_context *ctx, @@ -34,7 +35,7 @@ gen6_get_sample_position(struct gl_context *ctx, { uint8_t bits; - switch (fb->Visual.samples) { + switch (_mesa_geometric_samples(fb)) { case 1: result[0] = result[1] = 0.5f; return; diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 6431ed56d81..ba5c944fb3d 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -246,7 +246,7 @@ gen6_queryobj_get_results(struct gl_context *ctx, * and correctly emitted the number of pixel shader invocations, but, * whomever forgot to undo the multiply by 4. */ - if (brw->gen >= 8 || brw->is_haswell) + if (brw->gen == 8 || brw->is_haswell) query->Base.Result /= 4; break; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 0111f152ef6..17b4a7fba96 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -39,6 +39,8 @@ gen6_upload_scissor_state(struct brw_context *brw) const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); struct gen6_scissor_rect *scissor; uint32_t scissor_state_offset; + const unsigned int fb_width= _mesa_geometric_width(ctx->DrawBuffer); + const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer); scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE, sizeof(*scissor) * ctx->Const.MaxViewports, 32, @@ -56,7 +58,11 @@ gen6_upload_scissor_state(struct brw_context *brw) for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { int bbox[4]; - _mesa_scissor_bounding_box(ctx, ctx->DrawBuffer, i, bbox); + bbox[0] = 0; + bbox[1] = fb_width; + bbox[2] = 0; + bbox[3] = fb_height; + _mesa_intersect_scissor_bounding_box(ctx, i, bbox); if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) { /* If the scissor was out of bounds and got clamped to 0 width/height @@ -80,8 +86,8 @@ gen6_upload_scissor_state(struct brw_context *brw) /* memory: Y=0=top */ scissor[i].xmin = bbox[0]; scissor[i].xmax = bbox[1] - 1; - scissor[i].ymin = ctx->DrawBuffer->Height - bbox[3]; - scissor[i].ymax = ctx->DrawBuffer->Height - bbox[2] - 1; + scissor[i].ymin = fb_height - bbox[3]; + scissor[i].ymax = fb_height - bbox[2] - 1; } } BEGIN_BATCH(2); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index e445ce25600..b00517ed81e 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -31,6 +31,7 @@ #include "brw_util.h" #include "main/macros.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" /** @@ -273,7 +274,7 @@ upload_sf_state(struct brw_context *brw) int i; /* _NEW_BUFFER */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; float point_size; @@ -361,31 +362,7 @@ upload_sf_state(struct brw_context *brw) /* _NEW_LINE */ { - /* OpenGL dictates that line width should be rounded to the nearest - * integer - */ - float line_width = - roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth)); - uint32_t line_width_u3_7 = U_FIXED(line_width, 7); - - /* Line width of 0 is not allowed when MSAA enabled */ - if (ctx->Multisample._Enabled) { - if (line_width_u3_7 == 0) - line_width_u3_7 = 1; - } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) { - /* For 1 pixel line thickness or less, the general - * anti-aliasing algorithm gives up, and a garbage line is - * generated. Setting a Line Width of 0.0 specifies the - * rasterization of the "thinnest" (one-pixel-wide), - * non-antialiased lines. - * - * Lines rendered with zero Line Width are rasterized using - * Grid Intersection Quantization rules as specified by - * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line - * Rasterization. - */ - line_width_u3_7 = 0; - } + uint32_t line_width_u3_7 = brw_get_line_width(brw); dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; } if (ctx->Line.SmoothFlag) { diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 2fb0182c56e..7c8d8849f4e 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -30,6 +30,7 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/viewport.h" /* The clip VP defines the guardband region where expensive clipping is skipped @@ -93,10 +94,10 @@ gen6_upload_sf_vp(struct brw_context *brw) /* _NEW_BUFFERS */ if (render_to_fbo) { y_scale = 1.0; - y_bias = 0; + y_bias = 0.0; } else { y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; + y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer); } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 7081eb73428..d1748ba7457 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -33,6 +33,7 @@ #include "program/program.h" #include "program/prog_parameter.h" #include "program/prog_statevars.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" static void @@ -284,7 +285,7 @@ upload_wm_state(struct brw_context *brw) const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; /* _NEW_BUFFERS */ - const bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16 * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index e1c4f8b5d14..8d6d3fe1d34 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -112,7 +112,7 @@ upload_gs_state(struct brw_context *brw) GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | ((brw->gs.prog_data->invocations - 1) << GEN7_GS_INSTANCE_CONTROL_SHIFT) | - brw->gs.prog_data->dispatch_mode | + SET_FIELD(prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 58e33370c57..4fa46a8eb97 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -27,6 +27,7 @@ #include "brw_util.h" #include "main/macros.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" static void @@ -109,7 +110,7 @@ upload_sf_state(struct brw_context *brw) float point_size; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; dw1 = GEN6_SF_STATISTICS_ENABLE; @@ -192,30 +193,7 @@ upload_sf_state(struct brw_context *brw) /* _NEW_LINE */ { - /* OpenGL dictates that line width should be rounded to the nearest - * integer - */ - float line_width = - roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth)); - uint32_t line_width_u3_7 = U_FIXED(line_width, 7); - /* Line width of 0 is not allowed when MSAA enabled */ - if (ctx->Multisample._Enabled) { - if (line_width_u3_7 == 0) - line_width_u3_7 = 1; - } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) { - /* For 1 pixel line thickness or less, the general - * anti-aliasing algorithm gives up, and a garbage line is - * generated. Setting a Line Width of 0.0 specifies the - * rasterization of the "thinnest" (one-pixel-wide), - * non-antialiased lines. - * - * Lines rendered with zero Line Width are rasterized using - * Grid Intersection Quantization rules as specified by - * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line - * Rasterization. - */ - line_width_u3_7 = 0; - } + uint32_t line_width_u3_7 = brw_get_line_width(brw); dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; } if (ctx->Line.SmoothFlag) { diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index eb596845b72..b655205ec35 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -26,6 +26,7 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/viewport.h" static void @@ -45,10 +46,10 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw) /* _NEW_BUFFERS */ if (render_to_fbo) { y_scale = 1.0; - y_bias = 0; + y_bias = 0.0; } else { y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; + y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer); } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 278b3ec6d21..4b17d06fa83 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -43,18 +43,52 @@ gen7_upload_constant_state(struct brw_context *brw, int dwords = brw->gen >= 8 ? 11 : 7; BEGIN_BATCH(dwords); OUT_BATCH(opcode << 16 | (dwords - 2)); - OUT_BATCH(active ? stage_state->push_const_size : 0); - OUT_BATCH(0); + + /* Workaround for SKL+ (we use option #2 until we have a need for more + * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_* + * + * The driver must ensure The following case does not occur without a flush + * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to + * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length + * not equal to zero committed. Possible ways to avoid this condition + * include: + * 1. always force buffer 3 to have a non zero read length + * 2. always force buffer 0 to a zero read length + */ + if (brw->gen >= 9 && active) { + OUT_BATCH(0); + OUT_BATCH(stage_state->push_const_size); + } else { + OUT_BATCH(active ? stage_state->push_const_size : 0); + OUT_BATCH(0); + } /* Pointer to the constant buffer. Covered by the set of state flags * from gen6_prepare_wm_contants */ - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - if (brw->gen >= 8) { + if (brw->gen >= 9 && active) { + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + /* XXX: When using buffers other than 0, you need to specify the + * graphics virtual address regardless of INSPM/debug bits + */ + OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, + stage_state->push_const_offset); OUT_BATCH(0); OUT_BATCH(0); + } else if (brw->gen>= 8) { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index b9182758852..ea11ae845e3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -30,6 +30,7 @@ #include "program/program.h" #include "program/prog_parameter.h" #include "program/prog_statevars.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" static void @@ -45,7 +46,7 @@ upload_wm_state(struct brw_context *brw) uint32_t dw1, dw2; /* _NEW_BUFFERS */ - bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; dw1 = dw2 = 0; dw1 |= GEN7_WM_STATISTICS_ENABLE; @@ -76,6 +77,10 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_KILL_ENABLE; } + if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) { + dw1 |= GEN7_WM_DISPATCH_ENABLE; + } + /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || dw1 & GEN7_WM_KILL_ENABLE) { diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index b502650f991..12ac97a5d14 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -417,6 +417,16 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t surface_width = ALIGN(mt->logical_width0, level == 0 ? 8 : 1); uint32_t surface_height = ALIGN(mt->logical_height0, level == 0 ? 4 : 1); + /* From the documentation for 3DSTATE_WM_HZ_OP: "3DSTATE_MULTISAMPLE packet + * must be used prior to this packet to change the Number of Multisamples. + * This packet must not be used to change Number of Multisamples in a + * rendering sequence." + */ + if (brw->num_samples != mt->num_samples) { + gen8_emit_3dstate_multisample(brw, mt->num_samples); + brw->NewGLState |= _NEW_MULTISAMPLE; + } + /* The basic algorithm is: * - If needed, emit 3DSTATE_{DEPTH,HIER_DEPTH,STENCIL}_BUFFER and * 3DSTATE_CLEAR_PARAMS packets to set up the relevant buffers. diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 46b97131e20..26a02d3b045 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -48,8 +48,7 @@ gen8_upload_gs_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); OUT_BATCH(stage_state->prog_offset); OUT_BATCH(0); - OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE | - brw->geometry_program->VerticesIn | + OUT_BATCH(brw->geometry_program->VerticesIn | ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << @@ -59,10 +58,6 @@ gen8_upload_gs_state(struct brw_context *brw) OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(brw->gs.prog_data->base.base.total_scratch) - 11); - WARN_ONCE(true, - "May need to implement a temporary workaround: GS Number of " - "URB Entries must be less than or equal to the GS Maximum " - "Number of Threads.\n"); } else { OUT_BATCH(0); OUT_BATCH(0); @@ -81,7 +76,8 @@ gen8_upload_gs_state(struct brw_context *brw) uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | - brw->gs.prog_data->dispatch_mode | + SET_FIELD(prog_data->dispatch_mode, + GEN7_GS_DISPATCH_MODE) | ((brw->gs.prog_data->invocations - 1) << GEN7_GS_INSTANCE_CONTROL_SHIFT) | GEN6_GS_STATISTICS_ENABLE | diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 85ad3b6c551..a88f109c691 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -58,6 +58,9 @@ gen8_upload_ps_extra(struct brw_context *brw, if (prog_data->uses_omask) dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; + if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) + dw1 |= GEN8_PSX_SHADER_HAS_UAV; + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); OUT_BATCH(dw1); @@ -72,7 +75,7 @@ upload_ps_extra(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; - /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */ + /* BRW_NEW_NUM_SAMPLES */ const bool multisampled_fbo = brw->num_samples > 1; gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo); @@ -80,7 +83,7 @@ upload_ps_extra(struct brw_context *brw) const struct brw_tracked_state gen8_ps_extra = { .dirty = { - .mesa = _NEW_MULTISAMPLE, + .mesa = 0, .brw = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 52a21b6a8e8..c2b585d0001 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -154,14 +154,7 @@ upload_sf(struct brw_context *brw) dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; /* _NEW_LINE */ - /* OpenGL dictates that line width should be rounded to the nearest - * integer - */ - float line_width = - roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth)); - uint32_t line_width_u3_7 = U_FIXED(line_width, 7); - if (line_width_u3_7 == 0) - line_width_u3_7 = 1; + uint32_t line_width_u3_7 = brw_get_line_width(brw); if (brw->gen >= 9 || brw->is_cherryview) { dw1 |= line_width_u3_7 << GEN9_SF_LINE_WIDTH_SHIFT; } else { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index d0c2d80b17b..b2d1a579815 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -57,6 +57,19 @@ swizzle_to_scs(unsigned swizzle) } static uint32_t +surface_tiling_resource_mode(uint32_t tr_mode) +{ + switch (tr_mode) { + case INTEL_MIPTREE_TRMODE_YF: + return GEN9_SURFACE_TRMODE_TILEYF; + case INTEL_MIPTREE_TRMODE_YS: + return GEN9_SURFACE_TRMODE_TILEYS; + default: + return GEN9_SURFACE_TRMODE_NONE; + } +} + +static uint32_t surface_tiling_mode(uint32_t tiling) { switch (tiling) { @@ -70,8 +83,18 @@ surface_tiling_mode(uint32_t tiling) } static unsigned -vertical_alignment(const struct intel_mipmap_tree *mt) +vertical_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + uint32_t surf_type) { + /* On Gen9+ vertical alignment is ignored for 1D surfaces and when + * tr_mode is not TRMODE_NONE. + */ + if (brw->gen > 8 && + (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE || + surf_type == BRW_SURFACE_1D)) + return 0; + switch (mt->align_h) { case 4: return GEN8_SURFACE_VALIGN_4; @@ -85,8 +108,18 @@ vertical_alignment(const struct intel_mipmap_tree *mt) } static unsigned -horizontal_alignment(const struct intel_mipmap_tree *mt) +horizontal_alignment(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + uint32_t surf_type) { + /* On Gen9+ horizontal alignment is ignored when tr_mode is not + * TRMODE_NONE. + */ + if (brw->gen > 8 && + (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE || + gen9_use_linear_1d_layout(brw, mt))) + return 0; + switch (mt->align_w) { case 4: return GEN8_SURFACE_HALIGN_4; @@ -100,11 +133,11 @@ horizontal_alignment(const struct intel_mipmap_tree *mt) } static uint32_t * -allocate_surface_state(struct brw_context *brw, uint32_t *out_offset) +allocate_surface_state(struct brw_context *brw, uint32_t *out_offset, int index) { int dwords = brw->gen >= 9 ? 16 : 13; - uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - dwords * 4, 64, out_offset); + uint32_t *surf = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + dwords * 4, 64, index, out_offset); memset(surf, 0, dwords * 4); return surf; } @@ -120,7 +153,7 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, bool rw) { const unsigned mocs = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; - uint32_t *surf = allocate_surface_state(brw, out_offset); + uint32_t *surf = allocate_surface_state(brw, out_offset, -1); surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | surface_format << BRW_SURFACE_FORMAT_SHIFT | @@ -164,7 +197,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw, struct intel_mipmap_tree *aux_mt = NULL; uint32_t aux_mode = 0; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; + int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; + const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); if (mt->format == MESA_FORMAT_S_UINT8) { tiling_mode = GEN8_SURFACE_TILING_W; @@ -177,18 +212,29 @@ gen8_emit_texture_surface_state(struct brw_context *brw, if (mt->mcs_mt) { aux_mt = mt->mcs_mt; aux_mode = GEN8_SURFACE_AUX_MODE_MCS; + + /* + * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): + * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" + * + * From the hardware spec for GEN9: + * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN + * 16 must be used." + */ + assert(brw->gen < 9 || mt->align_w == 16); + assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16); } - uint32_t *surf = allocate_surface_state(brw, surf_offset); + const uint32_t surf_type = translate_tex_target(target); + uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); - surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT | + surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) | format << BRW_SURFACE_FORMAT_SHIFT | - vertical_alignment(mt) | - horizontal_alignment(mt) | + vertical_alignment(brw, mt, surf_type) | + horizontal_alignment(brw, mt, surf_type) | tiling_mode; - if (target == GL_TEXTURE_CUBE_MAP || - target == GL_TEXTURE_CUBE_MAP_ARRAY) { + if (surf_type == BRW_SURFACE_CUBE) { surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES; } @@ -209,6 +255,12 @@ gen8_emit_texture_surface_state(struct brw_context *brw, surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) | (max_level - min_level - 1); /* mip count */ + if (brw->gen >= 9) { + surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE); + /* Disable Mip Tail by setting a large value. */ + surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD); + } + if (aux_mt) { surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) | @@ -310,7 +362,7 @@ gen8_emit_null_surface_state(struct brw_context *brw, unsigned samples, uint32_t *out_offset) { - uint32_t *surf = allocate_surface_state(brw, out_offset); + uint32_t *surf = allocate_surface_state(brw, out_offset, -1); surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT | @@ -339,6 +391,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, unsigned height = mt->logical_height0; unsigned pitch = mt->pitch; uint32_t tiling = mt->tiling; + unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); uint32_t format = 0; uint32_t surf_type; uint32_t offset; @@ -390,15 +443,26 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, if (mt->mcs_mt) { aux_mt = mt->mcs_mt; aux_mode = GEN8_SURFACE_AUX_MODE_MCS; + + /* + * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): + * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" + * + * From the hardware spec for GEN9: + * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN + * 16 must be used." + */ + assert(brw->gen < 9 || mt->align_w == 16); + assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16); } - uint32_t *surf = allocate_surface_state(brw, &offset); + uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) | (is_array ? GEN7_SURFACE_IS_ARRAY : 0) | (format << BRW_SURFACE_FORMAT_SHIFT) | - vertical_alignment(mt) | - horizontal_alignment(mt) | + vertical_alignment(brw, mt, surf_type) | + horizontal_alignment(brw, mt, surf_type) | surface_tiling_mode(tiling); surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; @@ -417,6 +481,12 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, surf[5] = irb->mt_level - irb->mt->first_level; + if (brw->gen >= 9) { + surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE); + /* Disable Mip Tail by setting a large value. */ + surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD); + } + if (aux_mt) { surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) | diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c index 322e4663b99..2d8eeb1f10f 100644 --- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen8_viewport_state.c @@ -26,6 +26,7 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/viewport.h" static void @@ -33,6 +34,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; float y_scale, y_bias; + const float fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer); const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); float *vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, @@ -47,7 +49,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) y_bias = 0; } else { y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; + y_bias = fb_height; } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { @@ -116,8 +118,8 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) } else { vp[12] = ctx->ViewportArray[i].X; vp[13] = viewport_Xmax - 1; - vp[14] = ctx->DrawBuffer->Height - viewport_Ymax; - vp[15] = ctx->DrawBuffer->Height - ctx->ViewportArray[i].Y - 1; + vp[14] = fb_height - viewport_Ymax; + vp[15] = fb_height - ctx->ViewportArray[i].Y - 1; } vp += 16; diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index f92af55e37f..28f5adddf14 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -39,6 +39,9 @@ upload_vs_state(struct brw_context *brw) /* BRW_NEW_VS_PROG_DATA */ const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base; + assert(prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 || + prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT); + if (prog_data->base.use_alt_mode) floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT; @@ -66,7 +69,8 @@ upload_vs_state(struct brw_context *brw) (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); - uint32_t simd8_enable = prog_data->simd8 ? GEN8_VS_SIMD8_ENABLE : 0; + uint32_t simd8_enable = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ? + GEN8_VS_SIMD8_ENABLE : 0; OUT_BATCH(((brw->max_vs_threads - 1) << HSW_VS_MAX_THREADS_SHIFT) | GEN6_VS_STATISTICS_ENABLE | simd8_enable | diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index e522e4e9c1d..ed659ed625e 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -743,27 +743,54 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw) brw_render_cache_set_clear(brw); } -void -brw_load_register_mem(struct brw_context *brw, - uint32_t reg, - drm_intel_bo *bo, - uint32_t read_domains, uint32_t write_domain, - uint32_t offset) +static void +load_sized_register_mem(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t read_domains, uint32_t write_domain, + uint32_t offset, + int size) { + int i; + /* MI_LOAD_REGISTER_MEM only exists on Gen7+. */ assert(brw->gen >= 7); if (brw->gen >= 8) { - BEGIN_BATCH(4); - OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2)); - OUT_BATCH(reg); - OUT_RELOC64(bo, read_domains, write_domain, offset); + BEGIN_BATCH(4 * size); + for (i = 0; i < size; i++) { + OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2)); + OUT_BATCH(reg + i * 4); + OUT_RELOC64(bo, read_domains, write_domain, offset + i * 4); + } ADVANCE_BATCH(); } else { - BEGIN_BATCH(3); - OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg); - OUT_RELOC(bo, read_domains, write_domain, offset); + BEGIN_BATCH(3 * size); + for (i = 0; i < size; i++) { + OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); + OUT_BATCH(reg + i * 4); + OUT_RELOC(bo, read_domains, write_domain, offset + i * 4); + } ADVANCE_BATCH(); } } + +void +brw_load_register_mem(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t read_domains, uint32_t write_domain, + uint32_t offset) +{ + load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 1); +} + +void +brw_load_register_mem64(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t read_domains, uint32_t write_domain, + uint32_t offset) +{ + load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 2); +} diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 7680a402975..d3ab769356c 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -77,13 +77,10 @@ br13_for_cpp(int cpp) switch (cpp) { case 4: return BR13_8888; - break; case 2: return BR13_565; - break; case 1: return BR13_8; - break; default: unreachable("not reached"); } @@ -130,6 +127,40 @@ set_blitter_tiling(struct brw_context *brw, ADVANCE_BATCH(); \ } while (0) +static int +blt_pitch(struct intel_mipmap_tree *mt) +{ + int pitch = mt->pitch; + if (mt->tiling) + pitch /= 4; + return pitch; +} + +bool +intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst) +{ + /* The BLT doesn't handle sRGB conversion */ + assert(src == _mesa_get_srgb_format_linear(src)); + assert(dst == _mesa_get_srgb_format_linear(dst)); + + /* No swizzle or format conversions possible, except... */ + if (src == dst) + return true; + + /* ...we can either discard the alpha channel when going from A->X, + * or we can fill the alpha channel with 0xff when going from X->A + */ + if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM) + return (dst == MESA_FORMAT_B8G8R8A8_UNORM || + dst == MESA_FORMAT_B8G8R8X8_UNORM); + + if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM) + return (dst == MESA_FORMAT_R8G8B8A8_UNORM || + dst == MESA_FORMAT_R8G8B8X8_UNORM); + + return false; +} + /** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. @@ -172,11 +203,7 @@ intel_miptree_blit(struct brw_context *brw, * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. */ - if (src_format != dst_format && - ((src_format != MESA_FORMAT_B8G8R8A8_UNORM && - src_format != MESA_FORMAT_B8G8R8X8_UNORM) || - (dst_format != MESA_FORMAT_B8G8R8A8_UNORM && - dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) { + if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __func__, _mesa_get_format_name(src_format), @@ -197,14 +224,14 @@ intel_miptree_blit(struct brw_context *brw, * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer - * pitches < 32k. + * pitches < 32k. However, the pitch is measured in bytes for linear buffers + * and dwords for tiled buffers. * * As a result of these two limitations, we can only use the blitter to do - * this copy when the miptree's pitch is less than 32k. + * this copy when the miptree's pitch is less than 32k linear or 128k tiled. */ - if (src_mt->pitch >= 32768 || - dst_mt->pitch >= 32768) { - perf_debug("Falling back due to >=32k pitch\n"); + if (blt_pitch(src_mt) >= 32768 || blt_pitch(dst_mt) >= 32768) { + perf_debug("Falling back due to >= 32k/128k pitch\n"); return false; } @@ -261,8 +288,9 @@ intel_miptree_blit(struct brw_context *brw, return false; } - if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM && - dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) { + /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */ + if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 && + _mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) { intel_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height); diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h index f563939fdd9..2287c379c4e 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -46,6 +46,8 @@ intelEmitCopyBlit(struct brw_context *brw, GLshort w, GLshort h, GLenum logicop ); +bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst); + bool intel_miptree_blit(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 33a0348486d..75cf7854eff 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -88,25 +88,22 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) } void -brw_process_intel_debug_variable(struct brw_context *brw) +brw_process_intel_debug_variable(struct intel_screen *screen) { uint64_t intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); (void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug); if (INTEL_DEBUG & DEBUG_BUFMGR) - dri_bufmgr_set_debug(brw->bufmgr, true); + dri_bufmgr_set_debug(screen->bufmgr, true); - if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) { + if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && screen->devinfo->gen < 7) { fprintf(stderr, "shader_time debugging requires gen7 (Ivybridge) or better.\n"); INTEL_DEBUG &= ~DEBUG_SHADER_TIME; } - if (INTEL_DEBUG & DEBUG_PERF) - brw->perf_debug = true; - if (INTEL_DEBUG & DEBUG_AUB) - drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true); + drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true); } /** diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index f754be20b1d..4689492e1fd 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -114,8 +114,8 @@ extern uint64_t INTEL_DEBUG; extern uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage); -struct brw_context; +struct intel_screen; -extern void brw_process_intel_debug_variable(struct brw_context *brw); +extern void brw_process_intel_debug_variable(struct intel_screen *); extern bool brw_env_var_as_boolean(const char *var_name, bool default_value); diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index d6da34c7065..c99677c7197 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -323,9 +323,12 @@ intelInitExtensions(struct gl_context *ctx) } } + brw->predicate.supported = false; + if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; ctx->Extensions.ARB_derivative_control = true; + ctx->Extensions.ARB_framebuffer_no_attachments = true; ctx->Extensions.ARB_gpu_shader5 = true; ctx->Extensions.ARB_shader_atomic_counters = true; ctx->Extensions.ARB_texture_compression_bptc = true; @@ -337,6 +340,9 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; ctx->Extensions.ARB_transform_feedback_instanced = true; + + if (brw->intelScreen->cmd_parser_version >= 2) + brw->predicate.supported = true; } /* Only enable this in core profile because other parts of Mesa behave diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index aebed723f75..1b3a72f3ec2 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image->height, 1, image->pitch, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); if (!irb->mt) return; @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_image->base.Base.Level, intel_image->base.Base.Level, width, height, depth, - true, irb->mt->num_samples, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 24a5c3dc666..6aa969a4930 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -158,15 +158,32 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw, } } +bool +intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) +{ + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render + * Target(s)", beneath the "Fast Color Clear" bullet (p326): + * + * - Support is limited to tiled render targets. + * + * Gen9 changes the restriction to Y-tile only. + */ + if (brw->gen >= 9) + return tiling == I915_TILING_Y; + else if (brw->gen >= 7) + return tiling != I915_TILING_NONE; + else + return false; +} /** * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer - * can be used. + * can be used. This doesn't (and should not) inspect any of the properties of + * the miptree's BO. * * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", * beneath the "Fast Color Clear" bullet (p326): * - * - Support is limited to tiled render targets. * - Support is for non-mip-mapped and non-array surface types only. * * And then later, on p327: @@ -175,8 +192,8 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw, * 64bpp, and 128bpp. */ bool -intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, - struct intel_mipmap_tree *mt) +intel_miptree_is_fast_clear_capable(struct brw_context *brw, + struct intel_mipmap_tree *mt) { /* MCS support does not exist prior to Gen7 */ if (brw->gen < 7) @@ -193,15 +210,25 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, return false; } - if (mt->tiling != I915_TILING_X && - mt->tiling != I915_TILING_Y) - return false; if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16) return false; - if (mt->first_level != 0 || mt->last_level != 0) + if (mt->first_level != 0 || mt->last_level != 0) { + if (brw->gen >= 8) { + perf_debug("Multi-LOD fast clear - giving up (%dx%dx%d).\n", + mt->logical_width0, mt->logical_height0, mt->last_level); + } + return false; - if (mt->physical_depth0 != 1) + } + if (mt->physical_depth0 != 1) { + if (brw->gen >= 8) { + perf_debug("Layered fast clear - giving up. (%dx%d%d)\n", + mt->logical_width0, mt->logical_height0, + mt->physical_depth0); + } + return false; + } /* There's no point in using an MCS buffer if the surface isn't in a * renderable format. @@ -244,10 +271,9 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, - bool for_bo, GLuint num_samples, - bool force_all_slices_at_each_lod, - bool disable_aux_buffers) + enum intel_miptree_tiling_mode requested, + uint32_t layout_flags) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); if (!mt) @@ -286,7 +312,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt->logical_height0 = height0; mt->logical_depth0 = depth0; mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt->disable_aux_buffers = disable_aux_buffers; + mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0; exec_list_make_empty(&mt->hiz_map); /* The cpp is bytes per (1, blockheight)-sized block for compressed @@ -422,12 +448,15 @@ intel_miptree_create_layout(struct brw_context *brw, mt->physical_height0 = height0; mt->physical_depth0 = depth0; - if (!for_bo && + if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) && _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && (brw->must_use_separate_stencil || (brw->has_separate_stencil && intel_miptree_wants_hiz_buffer(brw, mt)))) { - const bool force_all_slices_at_each_lod = brw->gen == 6; + uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + if (brw->gen == 6) + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; + mt->stencil_mt = intel_miptree_create(brw, mt->target, MESA_FORMAT_S_UINT8, @@ -436,10 +465,10 @@ intel_miptree_create_layout(struct brw_context *brw, mt->logical_width0, mt->logical_height0, mt->logical_depth0, - true, num_samples, INTEL_MIPTREE_TILING_ANY, - force_all_slices_at_each_lod); + stencil_flags); + if (!mt->stencil_mt) { intel_miptree_release(&mt); return NULL; @@ -457,119 +486,36 @@ intel_miptree_create_layout(struct brw_context *brw, } } - if (force_all_slices_at_each_lod) + if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) mt->array_layout = ALL_SLICES_AT_EACH_LOD; - brw_miptree_layout(brw, mt); - - if (mt->disable_aux_buffers) - assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); - - return mt; -} - -/** - * \brief Helper function for intel_miptree_create(). - */ -static uint32_t -intel_miptree_choose_tiling(struct brw_context *brw, - mesa_format format, - uint32_t width0, - uint32_t num_samples, - enum intel_miptree_tiling_mode requested, - struct intel_mipmap_tree *mt) -{ - if (format == MESA_FORMAT_S_UINT8) { - /* The stencil buffer is W tiled. However, we request from the kernel a - * non-tiled buffer because the GTT is incapable of W fencing. - */ - return I915_TILING_NONE; - } - - /* Some usages may want only one type of tiling, like depth miptrees (Y - * tiled), or temporary BOs for uploading data once (linear). - */ - switch (requested) { - case INTEL_MIPTREE_TILING_ANY: - break; - case INTEL_MIPTREE_TILING_Y: - return I915_TILING_Y; - case INTEL_MIPTREE_TILING_NONE: - return I915_TILING_NONE; - } - - if (num_samples > 1) { - /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled - * Surface"): - * - * [DevSNB+]: For multi-sample render targets, this field must be - * 1. MSRTs can only be tiled. - * - * Our usual reason for preferring X tiling (fast blits using the - * blitting engine) doesn't apply to MSAA, since we'll generally be - * downsampling or upsampling when blitting between the MSAA buffer - * and another buffer, and the blitting engine doesn't support that. - * So use Y tiling, since it makes better use of the cache. - */ - return I915_TILING_Y; - } - - GLenum base_format = _mesa_get_format_base_format(format); - if (base_format == GL_DEPTH_COMPONENT || - base_format == GL_DEPTH_STENCIL_EXT) - return I915_TILING_Y; - - /* 1D textures (and 1D array textures) don't get any benefit from tiling, - * in fact it leads to a less efficient use of memory space and bandwidth - * due to tile alignment. + /* + * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are + * multisampled or have an AUX buffer attached to it. + * + * GEN | MSRT | AUX_CCS_* or AUX_MCS + * ------------------------------------------- + * 9 | HALIGN_16 | HALIGN_16 + * 8 | HALIGN_ANY | HALIGN_16 + * 7 | ? | ? + * 6 | ? | ? */ - if (mt->logical_height0 == 1) - return I915_TILING_NONE; - - int minimum_pitch = mt->total_width * mt->cpp; - - /* If the width is much smaller than a tile, don't bother tiling. */ - if (minimum_pitch < 64) - return I915_TILING_NONE; - - if (ALIGN(minimum_pitch, 512) >= 32768 || - mt->total_width >= 32768 || mt->total_height >= 32768) { - perf_debug("%dx%d miptree too large to blit, falling back to untiled", - mt->total_width, mt->total_height); - return I915_TILING_NONE; + if (intel_miptree_is_fast_clear_capable(brw, mt)) { + if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1)) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + } else if (brw->gen >= 9 && num_samples > 1) { + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + } else { + /* For now, nothing else has this requirement */ + assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); } - /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ - if (brw->gen < 6) - return I915_TILING_X; + brw_miptree_layout(brw, mt, requested, layout_flags); - /* From the Sandybridge PRM, Volume 1, Part 2, page 32: - * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX - * or Linear." - * 128 bits per pixel translates to 16 bytes per pixel. This is necessary - * all the way back to 965, but is permitted on Gen7+. - */ - if (brw->gen < 7 && mt->cpp >= 16) - return I915_TILING_X; - - /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most - * messages), on p64, under the heading "Surface Vertical Alignment": - * - * This field must be set to VALIGN_4 for all tiled Y Render Target - * surfaces. - * - * So if the surface is renderable and uses a vertical alignment of 2, - * force it to be X tiled. This is somewhat conservative (it's possible - * that the client won't ever render to this surface), but it's difficult - * to know that ahead of time. And besides, since we use a vertical - * alignment of 4 as often as we can, this shouldn't happen very often. - */ - if (brw->gen == 7 && mt->align_h == 2 && - brw->format_supported_as_render_target[format]) { - return I915_TILING_X; - } + if (mt->disable_aux_buffers) + assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); - return I915_TILING_Y | I915_TILING_X; + return mt; } @@ -615,33 +561,33 @@ intel_lower_compressed_format(struct brw_context *brw, mesa_format format) struct intel_mipmap_tree * intel_miptree_create(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - bool expect_accelerated_upload, + GLenum target, + mesa_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, GLuint num_samples, enum intel_miptree_tiling_mode requested_tiling, - bool force_all_slices_at_each_lod) + uint32_t layout_flags) { struct intel_mipmap_tree *mt; mesa_format tex_format = format; mesa_format etc_format = MESA_FORMAT_NONE; GLuint total_width, total_height; + uint32_t alloc_flags = 0; format = intel_lower_compressed_format(brw, format); etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE; + assert((layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) == 0); + assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0); mt = intel_miptree_create_layout(brw, target, format, - first_level, last_level, width0, - height0, depth0, - false, num_samples, - force_all_slices_at_each_lod, - false /*disable_aux_buffers*/); + first_level, last_level, width0, + height0, depth0, num_samples, + requested_tiling, layout_flags); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -659,25 +605,21 @@ intel_miptree_create(struct brw_context *brw, total_height = ALIGN(total_height, 64); } - uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0, - num_samples, requested_tiling, - mt); bool y_or_x = false; - if (tiling == (I915_TILING_Y | I915_TILING_X)) { + if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) { y_or_x = true; mt->tiling = I915_TILING_Y; - } else { - mt->tiling = tiling; } + if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) + alloc_flags |= BO_ALLOC_FOR_RENDER; + unsigned long pitch; + mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", total_width, + total_height, mt->cpp, &mt->tiling, + &pitch, alloc_flags); mt->etc_format = etc_format; - mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, - &mt->tiling, &pitch, - (expect_accelerated_upload ? - BO_ALLOC_FOR_RENDER : 0)); mt->pitch = pitch; /* If the BO is too large to fit in the aperture, we need to use the @@ -691,10 +633,8 @@ intel_miptree_create(struct brw_context *brw, mt->tiling = I915_TILING_X; drm_intel_bo_unreference(mt->bo); mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, - &mt->tiling, &pitch, - (expect_accelerated_upload ? - BO_ALLOC_FOR_RENDER : 0)); + total_width, total_height, mt->cpp, + &mt->tiling, &pitch, alloc_flags); mt->pitch = pitch; } @@ -707,6 +647,7 @@ intel_miptree_create(struct brw_context *brw, if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { + assert(mt->num_samples > 1); if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) { intel_miptree_release(&mt); return NULL; @@ -718,8 +659,11 @@ intel_miptree_create(struct brw_context *brw, * Allocation of the MCS miptree will be deferred until the first fast * clear actually occurs. */ - if (intel_is_non_msrt_mcs_buffer_supported(brw, mt)) + if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) && + intel_miptree_is_fast_clear_capable(brw, mt)) { mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; + assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1); + } return mt; } @@ -733,7 +677,7 @@ intel_miptree_create_for_bo(struct brw_context *brw, uint32_t height, uint32_t depth, int pitch, - bool disable_aux_buffers) + uint32_t layout_flags) { struct intel_mipmap_tree *mt; uint32_t tiling, swizzle; @@ -754,11 +698,18 @@ intel_miptree_create_for_bo(struct brw_context *brw, target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; + /* 'requested' parameter of intel_miptree_create_layout() is relevant + * only for non bo miptree. Tiling for bo is already computed above. + * So, the tiling requested (INTEL_MIPTREE_TILING_ANY) below is + * just a place holder and will not make any change to the miptree + * tiling format. + */ + layout_flags |= MIPTREE_LAYOUT_FOR_BO; mt = intel_miptree_create_layout(brw, target, format, 0, 0, - width, height, depth, - true, 0, false, - disable_aux_buffers); + width, height, depth, 0, + INTEL_MIPTREE_TILING_ANY, + layout_flags); if (!mt) return NULL; @@ -808,7 +759,7 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, height, 1, pitch, - false); + 0); if (!singlesample_mt) goto fail; @@ -817,7 +768,8 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, * Allocation of the MCS miptree will be deferred until the first fast * clear actually occurs. */ - if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt)) + if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) && + intel_miptree_is_fast_clear_capable(intel, singlesample_mt)) singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; if (num_samples == 0) { @@ -866,8 +818,9 @@ intel_miptree_create_for_renderbuffer(struct brw_context *brw, GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; mt = intel_miptree_create(brw, target, format, 0, 0, - width, height, depth, true, num_samples, - INTEL_MIPTREE_TILING_ANY, false); + width, height, depth, num_samples, + INTEL_MIPTREE_TILING_ANY, + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (!mt) goto fail; @@ -1258,8 +1211,10 @@ intel_miptree_copy_slice(struct brw_context *brw, assert(src_mt->format == dst_mt->format); if (dst_mt->compressed) { - height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; - width = ALIGN(width, dst_mt->align_w); + unsigned int i, j; + _mesa_get_format_block_size(dst_mt->format, &i, &j); + height = ALIGN(height, j) / j; + width = ALIGN(width, i); } /* If it's a packed depth/stencil buffer with separate stencil, the blit @@ -1378,10 +1333,9 @@ intel_miptree_alloc_mcs(struct brw_context *brw, mt->logical_width0, mt->logical_height0, mt->logical_depth0, - true, 0 /* num_samples */, INTEL_MIPTREE_TILING_Y, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: * @@ -1429,6 +1383,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, unsigned mcs_height = ALIGN(mt->logical_height0, height_divisor) / height_divisor; assert(mt->logical_depth0 == 1); + uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + if (brw->gen >= 8) + layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; mt->mcs_mt = intel_miptree_create(brw, mt->target, format, @@ -1437,10 +1394,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, mcs_width, mcs_height, mt->logical_depth0, - true, 0 /* num_samples */, INTEL_MIPTREE_TILING_Y, - false); + layout_flags); return mt->mcs_mt; } @@ -1682,7 +1638,10 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, struct intel_mipmap_tree *mt) { struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); - const bool force_all_slices_at_each_lod = brw->gen == 6; + uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; + + if (brw->gen == 6) + layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; if (!buf) return NULL; @@ -1695,10 +1654,9 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, mt->logical_width0, mt->logical_height0, mt->logical_depth0, - true, mt->num_samples, INTEL_MIPTREE_TILING_ANY, - force_all_slices_at_each_lod); + layout_flags); if (!buf->mt) { free(buf); return NULL; @@ -2128,9 +2086,8 @@ intel_miptree_map_blit(struct brw_context *brw, map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, 0, 0, map->w, map->h, 1, - false, 0, - INTEL_MIPTREE_TILING_NONE, - false); + 0, INTEL_MIPTREE_TILING_NONE, 0); + if (!map->mt) { fprintf(stderr, "Failed to allocate blit temporary\n"); goto fail; @@ -2675,7 +2632,9 @@ intel_miptree_map(struct brw_context *brw, } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) { intel_miptree_map_blit(brw, mt, map, level, slice); #if defined(USE_SSE41) - } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) { + } else if (!(mode & GL_MAP_WRITE_BIT) && + !mt->compressed && cpu_has_sse4_1 && + (mt->pitch % 16 == 0)) { intel_miptree_map_movntdqa(brw, mt, map, level, slice); #endif } else { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 8b42e4adb79..bde6daa4e2d 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -330,6 +330,13 @@ struct intel_miptree_aux_buffer struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */ }; +/* Tile resource modes */ +enum intel_miptree_tr_mode { + INTEL_MIPTREE_TRMODE_NONE, + INTEL_MIPTREE_TRMODE_YF, + INTEL_MIPTREE_TRMODE_YS +}; + struct intel_mipmap_tree { /** Buffer object containing the pixel data. */ @@ -338,6 +345,7 @@ struct intel_mipmap_tree uint32_t pitch; /**< pitch in bytes. */ uint32_t tiling; /**< One of the I915_TILING_* flags */ + enum intel_miptree_tr_mode tr_mode; /* Effectively the key: */ @@ -514,19 +522,27 @@ enum intel_miptree_tiling_mode { INTEL_MIPTREE_TILING_NONE, }; -bool -intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, - struct intel_mipmap_tree *mt); - void intel_get_non_msrt_mcs_alignment(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height); - +bool +intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling); +bool +intel_miptree_is_fast_clear_capable(struct brw_context *brw, + struct intel_mipmap_tree *mt); bool intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt); +enum { + MIPTREE_LAYOUT_ACCELERATED_UPLOAD = 1 << 0, + MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD = 1 << 1, + MIPTREE_LAYOUT_FOR_BO = 1 << 2, + MIPTREE_LAYOUT_DISABLE_AUX = 1 << 3, + MIPTREE_LAYOUT_FORCE_HALIGN16 = 1 << 4, +}; + struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, GLenum target, mesa_format format, @@ -535,10 +551,9 @@ struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, GLuint width0, GLuint height0, GLuint depth0, - bool expect_accelerated_upload, GLuint num_samples, enum intel_miptree_tiling_mode, - bool force_all_slices_at_each_lod); + uint32_t flags); struct intel_mipmap_tree * intel_miptree_create_for_bo(struct brw_context *brw, @@ -549,7 +564,7 @@ intel_miptree_create_for_bo(struct brw_context *brw, uint32_t height, uint32_t depth, int pitch, - bool disable_aux_buffers); + uint32_t layout_flags); void intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, @@ -753,7 +768,11 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, const struct intel_mipmap_tree *mt, unsigned level); -void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt); +void +brw_miptree_layout(struct brw_context *brw, + struct intel_mipmap_tree *mt, + enum intel_miptree_tiling_mode requested, + uint32_t layout_flags); void *intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt); diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c index 4ecefc8cf54..6c6bd8629ac 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -28,6 +28,7 @@ #include "main/glheader.h" #include "main/enums.h" #include "main/image.h" +#include "main/glformats.h" #include "main/mtypes.h" #include "main/condrender.h" #include "main/fbobject.h" @@ -76,8 +77,16 @@ do_blit_drawpixels(struct gl_context * ctx, struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - if (!_mesa_format_matches_format_and_type(irb->mt->format, format, type, - false)) { + mesa_format src_format = _mesa_format_from_format_and_type(format, type); + if (_mesa_format_is_mesa_array_format(src_format)) + src_format = _mesa_format_from_array_format(src_format); + mesa_format dst_format = irb->mt->format; + + /* We can safely discard sRGB encode/decode for the DrawPixels interface */ + src_format = _mesa_get_srgb_format_linear(src_format); + dst_format = _mesa_get_srgb_format_linear(dst_format); + + if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) { DBG("%s: bad format for blit\n", __func__); return false; } @@ -112,7 +121,7 @@ do_blit_drawpixels(struct gl_context * ctx, src_offset, width, height, 1, src_stride, - false /*disable_aux_buffers*/); + 0); if (!pbo_mt) return false; diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index d3ca38b6ecd..30380570d62 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -226,8 +226,30 @@ intelReadPixels(struct gl_context * ctx, if (_mesa_is_bufferobj(pack->BufferObj)) { if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1, - format, type, pixels, pack)) + format, type, pixels, pack)) { + /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by + * binding the user-provided BO as a fake framebuffer and rendering + * to it. This breaks the invariant of the GL that nothing is able + * to render to a BO, causing nondeterministic corruption issues + * because the render cache is not coherent with a number of other + * caches that the BO could potentially be bound to afterwards. + * + * This could be solved in the same way that we guarantee texture + * coherency after a texture is attached to a framebuffer and + * rendered to, but that would involve checking *all* BOs bound to + * the pipeline for the case we need to emit a cache flush due to + * previous rendering to any of them -- Including vertex, index, + * uniform, atomic counter, shader image, transform feedback, + * indirect draw buffers, etc. + * + * That would increase the per-draw call overhead even though it's + * very unlikely that any of the BOs bound to the pipeline has been + * rendered to via a PBO at any point, so it seems better to just + * flush here unconditionally. + */ + intel_batchbuffer_emit_mi_flush(brw); return; + } perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index 488fb5b98f8..bd14e189da3 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -48,6 +48,20 @@ #define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23)) # define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22) +/* Manipulate the predicate bit based on some register values. Only on Gen7+ */ +#define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23)) +# define MI_PREDICATE_LOADOP_KEEP (0 << 6) +# define MI_PREDICATE_LOADOP_LOAD (2 << 6) +# define MI_PREDICATE_LOADOP_LOADINV (3 << 6) +# define MI_PREDICATE_COMBINEOP_SET (0 << 3) +# define MI_PREDICATE_COMBINEOP_AND (1 << 3) +# define MI_PREDICATE_COMBINEOP_OR (2 << 3) +# define MI_PREDICATE_COMBINEOP_XOR (3 << 3) +# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0) +# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0) +# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0) +# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0) + /** @{ * * PIPE_CONTROL operation, a combination MI_FLUSH and register write with @@ -69,6 +83,7 @@ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */ #define PIPE_CONTROL_ISP_DIS (1 << 9) #define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */ /* GT */ #define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) @@ -147,3 +162,11 @@ # define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1) # define GEN8_HIZ_PMA_MASK_BITS \ ((GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) << 16) + +/* Predicate registers */ +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 +#define MI_PREDICATE_DATA 0x2410 +#define MI_PREDICATE_RESULT 0x2418 +#define MI_PREDICATE_RESULT_1 0x241C +#define MI_PREDICATE_RESULT_2 0x2214 diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 4860a160ee9..de14696bd76 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -39,6 +39,7 @@ #include "swrast/s_renderbuffer.h" #include "util/ralloc.h" #include "brw_shader.h" +#include "glsl/nir/nir.h" #include "utils.h" #include "xmlpool.h" @@ -1372,6 +1373,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) if (!intelScreen->devinfo) return false; + brw_process_intel_debug_variable(intelScreen); + intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7; intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); @@ -1407,6 +1410,13 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) (ret != -1 || errno != EINVAL); } + struct drm_i915_getparam getparam; + getparam.param = I915_PARAM_CMD_PARSER_VERSION; + getparam.value = &intelScreen->cmd_parser_version; + const int ret = drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam); + if (ret == -1) + intelScreen->cmd_parser_version = 0; + psp->extensions = !intelScreen->has_context_reset_notification ? intelScreenExtensions : intelRobustScreenExtensions; diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index e7a14903d6e..742b3d30eee 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -72,7 +72,13 @@ struct intel_screen * Configuration cache with default values for all contexts */ driOptionCache optionCache; -}; + + /** + * Version of the command parser reported by the + * I915_PARAM_CMD_PARSER_VERSION parameter + */ + int cmd_parser_version; + }; extern void intelDestroyContext(__DRIcontext * driContextPriv); diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 777a682ad21..b0181ad1d75 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -93,7 +93,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, } else { intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj, intel_image, - false); + 0); /* Even if the object currently has a mipmap tree associated * with it, this one is a more likely candidate to represent the @@ -144,10 +144,8 @@ intel_alloc_texture_storage(struct gl_context *ctx, first_image->TexFormat, 0, levels - 1, width, height, depth, - false, /* expect_accelerated */ num_samples, - INTEL_MIPTREE_TILING_ANY, - false); + INTEL_MIPTREE_TILING_ANY, 0); if (intel_texobj->mt == NULL) { return false; @@ -341,7 +339,7 @@ intel_set_texture_storage_for_buffer_object(struct gl_context *ctx, buffer_offset, image->Width, image->Height, image->Depth, row_stride, - false /*disable_aux_buffers*/); + 0); if (!intel_texobj->mt) return false; diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h index f048e846d55..402a3891ecd 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.h +++ b/src/mesa/drivers/dri/i965/intel_tex.h @@ -53,7 +53,7 @@ struct intel_mipmap_tree * intel_miptree_create_for_teximage(struct brw_context *brw, struct intel_texture_object *intelObj, struct intel_texture_image *intelImage, - bool expect_accelerated_upload); + uint32_t layout_flags); GLuint intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit); diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 7952ee5ad88..ebe84b664d4 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -36,7 +36,7 @@ struct intel_mipmap_tree * intel_miptree_create_for_teximage(struct brw_context *brw, struct intel_texture_object *intelObj, struct intel_texture_image *intelImage, - bool expect_accelerated_upload) + uint32_t layout_flags) { GLuint lastLevel; int width, height, depth; @@ -79,10 +79,9 @@ intel_miptree_create_for_teximage(struct brw_context *brw, width, height, depth, - expect_accelerated_upload, intelImage->base.Base.NumSamples, INTEL_MIPTREE_TILING_ANY, - false); + layout_flags); } static void @@ -155,7 +154,7 @@ intel_set_texture_image_bo(struct gl_context *ctx, GLuint width, GLuint height, GLuint pitch, GLuint tile_x, GLuint tile_y, - bool disable_aux_buffers) + uint32_t layout_flags) { struct brw_context *brw = brw_context(ctx); struct intel_texture_image *intel_image = intel_texture_image(image); @@ -171,7 +170,7 @@ intel_set_texture_image_bo(struct gl_context *ctx, intel_image->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat, 0, width, height, 1, pitch, - disable_aux_buffers); + layout_flags); if (intel_image->mt == NULL) return; intel_image->mt->target = target; @@ -255,8 +254,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->Base.Base.Width, rb->Base.Base.Height, rb->mt->pitch, - 0, 0, - false /*disable_aux_buffers*/); + 0, 0, 0); _mesa_unlock_texture(&brw->ctx, texObj); } @@ -349,7 +347,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->width, image->height, image->pitch, image->tile_x, image->tile_y, - true /*disable_aux_buffers*/); + MIPTREE_LAYOUT_DISABLE_AUX); } /** @@ -486,8 +484,15 @@ intel_get_tex_image(struct gl_context *ctx, if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0, texImage->Width, texImage->Height, texImage->Depth, format, type, - pixels, &ctx->Pack)) + pixels, &ctx->Pack)) { + /* Flush to guarantee coherency between the render cache and other + * caches the PBO could potentially be bound to after this point. + * See the related comment in intelReadPixels() for a more detailed + * explanation. + */ + intel_batchbuffer_emit_mi_flush(brw); return; + } perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 1d827683b99..4991c2997ef 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -47,8 +47,10 @@ intel_update_max_level(struct intel_texture_object *intelObj, { struct gl_texture_object *tObj = &intelObj->base; - if (sampler->MinFilter == GL_NEAREST || - sampler->MinFilter == GL_LINEAR) { + if (!tObj->_MipmapComplete || + (tObj->_RenderToTexture && + (sampler->MinFilter == GL_NEAREST || + sampler->MinFilter == GL_LINEAR))) { intelObj->_MaxLevel = tObj->BaseLevel; } else { intelObj->_MaxLevel = tObj->_MaxLevel; @@ -142,10 +144,9 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit) width, height, depth, - true, 0 /* num_samples */, INTEL_MIPTREE_TILING_ANY, - false); + MIPTREE_LAYOUT_ACCELERATED_UPLOAD); if (!intelObj->mt) return false; } diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 206a76e9242..8010fb4f610 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -26,11 +26,13 @@ #include "brw_cfg.h" #include "program/program.h" +using namespace brw; + class cmod_propagation_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct brw_wm_prog_data *prog_data; @@ -42,30 +44,31 @@ public: class cmod_propagation_fs_visitor : public fs_visitor { public: - cmod_propagation_fs_visitor(struct brw_context *brw, + cmod_propagation_fs_visitor(struct brw_compiler *compiler, struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) - : fs_visitor(brw, NULL, NULL, prog_data, shader_prog, NULL, 8) {} + : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL, + &prog_data->base, shader_prog, + (struct gl_program *) NULL, 8, -1) {} }; void cmod_propagation_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; fp = ralloc(NULL, struct brw_fragment_program); prog_data = ralloc(NULL, struct brw_wm_prog_data); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new cmod_propagation_fs_visitor(brw, prog_data, shader_prog); + v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog); _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static fs_inst * @@ -100,13 +103,13 @@ cmod_propagation(fs_visitor *v) TEST_F(cmod_propagation_test, basic) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -132,13 +135,13 @@ TEST_F(cmod_propagation_test, basic) TEST_F(cmod_propagation_test, cmp_nonzero) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg nonzero(1.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, nonzero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE); /* = Before = * @@ -165,12 +168,12 @@ TEST_F(cmod_propagation_test, cmp_nonzero) TEST_F(cmod_propagation_test, non_cmod_instruction) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::uint_type); fs_reg src0 = v->vgrf(glsl_type::uint_type); fs_reg zero(0u); - v->emit(BRW_OPCODE_FBL, dest, src0); - v->emit(BRW_OPCODE_CMP, v->reg_null_ud, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.FBL(dest, src0); + bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -197,16 +200,15 @@ TEST_F(cmod_propagation_test, non_cmod_instruction) TEST_F(cmod_propagation_test, intervening_flag_write) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, src2, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -236,17 +238,16 @@ TEST_F(cmod_propagation_test, intervening_flag_write) TEST_F(cmod_propagation_test, intervening_flag_read) { + const fs_builder &bld = v->bld; fs_reg dest0 = v->vgrf(glsl_type::float_type); fs_reg dest1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest0, src0, src1); - v->emit(BRW_OPCODE_SEL, dest1, src2, zero) - ->predicate = BRW_PREDICATE_NORMAL; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest0, src0, src1); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -276,16 +277,16 @@ TEST_F(cmod_propagation_test, intervening_flag_read) TEST_F(cmod_propagation_test, intervening_dest_write) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::vec4_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, offset(dest, 2), src0, src1); - v->emit(SHADER_OPCODE_TEX, dest, src2) + bld.ADD(offset(dest, 2), src0, src1); + bld.emit(SHADER_OPCODE_TEX, dest, src2) ->regs_written = 4; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, offset(dest, 2), zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.CMP(bld.null_reg_f(), offset(dest, 2), zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -316,18 +317,16 @@ TEST_F(cmod_propagation_test, intervening_dest_write) TEST_F(cmod_propagation_test, intervening_flag_read_same_value) { + const fs_builder &bld = v->bld; fs_reg dest0 = v->vgrf(glsl_type::float_type); fs_reg dest1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest0, src0, src1) - ->conditional_mod = BRW_CONDITIONAL_GE; - v->emit(BRW_OPCODE_SEL, dest1, src2, zero) - ->predicate = BRW_PREDICATE_NORMAL; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1)); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -357,14 +356,14 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value) TEST_F(cmod_propagation_test, negate) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); + bld.ADD(dest, src0, src1); dest.negate = true; - v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); /* = Before = * @@ -390,13 +389,13 @@ TEST_F(cmod_propagation_test, negate) TEST_F(cmod_propagation_test, movnz) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_CMP, dest, src0, src1) - ->conditional_mod = BRW_CONDITIONAL_GE; - v->emit(BRW_OPCODE_MOV, v->reg_null_f, dest) - ->conditional_mod = BRW_CONDITIONAL_NZ; + bld.CMP(dest, src0, src1, BRW_CONDITIONAL_GE); + set_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(bld.null_reg_f(), dest)); /* = Before = * @@ -422,14 +421,14 @@ TEST_F(cmod_propagation_test, movnz) TEST_F(cmod_propagation_test, different_types_cmod_with_zero) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::int_type); fs_reg src1 = v->vgrf(glsl_type::int_type); fs_reg zero(0.0f); - v->emit(BRW_OPCODE_ADD, dest, src0, src1); - v->emit(BRW_OPCODE_CMP, v->reg_null_f, retype(dest, BRW_REGISTER_TYPE_F), - zero) - ->conditional_mod = BRW_CONDITIONAL_GE; + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero, + BRW_CONDITIONAL_GE); /* = Before = * @@ -456,15 +455,15 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) TEST_F(cmod_propagation_test, andnz_one) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); fs_reg one(1); - v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) - ->conditional_mod = BRW_CONDITIONAL_L; - v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one) - ->conditional_mod = BRW_CONDITIONAL_NZ; + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.null_reg_d(), dest, one)); /* = Before = * 0: cmp.l.f0(8) dest:F src0:F 0F @@ -491,15 +490,15 @@ TEST_F(cmod_propagation_test, andnz_one) TEST_F(cmod_propagation_test, andnz_non_one) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); fs_reg nonone(38); - v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) - ->conditional_mod = BRW_CONDITIONAL_L; - v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, nonone) - ->conditional_mod = BRW_CONDITIONAL_NZ; + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.null_reg_d(), dest, nonone)); /* = Before = * 0: cmp.l.f0(8) dest:F src0:F 0F @@ -526,15 +525,15 @@ TEST_F(cmod_propagation_test, andnz_non_one) TEST_F(cmod_propagation_test, andz_one) { + const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg zero(0.0f); fs_reg one(1); - v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) - ->conditional_mod = BRW_CONDITIONAL_L; - v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one) - ->conditional_mod = BRW_CONDITIONAL_Z; + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_Z, + bld.AND(bld.null_reg_d(), dest, one)); /* = Before = * 0: cmp.l.f0(8) dest:F src0:F 0F diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 4c91af3ea8d..3ef0cb319eb 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -26,11 +26,13 @@ #include "brw_cfg.h" #include "program/program.h" +using namespace brw; + class saturate_propagation_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct brw_wm_prog_data *prog_data; @@ -42,30 +44,31 @@ public: class saturate_propagation_fs_visitor : public fs_visitor { public: - saturate_propagation_fs_visitor(struct brw_context *brw, + saturate_propagation_fs_visitor(struct brw_compiler *compiler, struct brw_wm_prog_data *prog_data, struct gl_shader_program *shader_prog) - : fs_visitor(brw, NULL, NULL, prog_data, shader_prog, NULL, 8) {} + : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL, + &prog_data->base, shader_prog, + (struct gl_program *) NULL, 8, -1) {} }; void saturate_propagation_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; fp = ralloc(NULL, struct brw_fragment_program); prog_data = ralloc(NULL, struct brw_wm_prog_data); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new saturate_propagation_fs_visitor(brw, prog_data, shader_prog); + v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog); _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static fs_inst * @@ -100,13 +103,13 @@ saturate_propagation(fs_visitor *v) TEST_F(saturate_propagation_test, basic) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + bld.ADD(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -135,15 +138,15 @@ TEST_F(saturate_propagation_test, basic) TEST_F(saturate_propagation_test, other_non_saturated_use) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; - v->emit(BRW_OPCODE_ADD, dst2, dst0, src0); + bld.ADD(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); + bld.ADD(dst2, dst0, src0); /* = Before = * @@ -173,14 +176,14 @@ TEST_F(saturate_propagation_test, other_non_saturated_use) TEST_F(saturate_propagation_test, predicated_instruction) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1) + bld.ADD(dst0, src0, src1) ->predicate = BRW_PREDICATE_NORMAL; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -208,14 +211,14 @@ TEST_F(saturate_propagation_test, predicated_instruction) TEST_F(saturate_propagation_test, neg_mov_sat) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); + bld.ADD(dst0, src0, src1); dst0.negate = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -243,14 +246,14 @@ TEST_F(saturate_propagation_test, neg_mov_sat) TEST_F(saturate_propagation_test, abs_mov_sat) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); + bld.ADD(dst0, src0, src1); dst0.abs = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); /* = Before = * @@ -278,16 +281,15 @@ TEST_F(saturate_propagation_test, abs_mov_sat) TEST_F(saturate_propagation_test, producer_saturates) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1) - ->saturate = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; - v->emit(BRW_OPCODE_MOV, dst2, dst0); + set_saturate(true, bld.ADD(dst0, src0, src1)); + set_saturate(true, bld.MOV(dst1, dst0)); + bld.MOV(dst2, dst0); /* = Before = * @@ -318,16 +320,15 @@ TEST_F(saturate_propagation_test, producer_saturates) TEST_F(saturate_propagation_test, intervening_saturating_copy) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_ADD, dst0, src0, src1); - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; - v->emit(BRW_OPCODE_MOV, dst2, dst0) - ->saturate = true; + bld.ADD(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); + set_saturate(true, bld.MOV(dst2, dst0)); /* = Before = * @@ -360,16 +361,16 @@ TEST_F(saturate_propagation_test, intervening_saturating_copy) TEST_F(saturate_propagation_test, intervening_dest_write) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::vec4_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); - v->emit(BRW_OPCODE_ADD, offset(dst0, 2), src0, src1); - v->emit(SHADER_OPCODE_TEX, dst0, src2) + bld.ADD(offset(dst0, 2), src0, src1); + bld.emit(SHADER_OPCODE_TEX, dst0, src2) ->regs_written = 4; - v->emit(BRW_OPCODE_MOV, dst1, offset(dst0, 2)) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, offset(dst0, 2))); /* = Before = * @@ -400,18 +401,17 @@ TEST_F(saturate_propagation_test, intervening_dest_write) TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat) { + const fs_builder &bld = v->bld; fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg dst2 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - v->emit(BRW_OPCODE_MUL, dst0, src0, src1); + bld.MUL(dst0, src0, src1); dst0.negate = true; - v->emit(BRW_OPCODE_MOV, dst1, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst1, dst0)); dst0.negate = false; - v->emit(BRW_OPCODE_MOV, dst2, dst0) - ->saturate = true; + set_saturate(true, bld.MOV(dst2, dst0)); /* = Before = * diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 2ef52e9fd6b..84e43fa75cd 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -33,7 +33,7 @@ class copy_propagation_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct gl_shader_program *shader_prog; @@ -44,12 +44,11 @@ public: class copy_propagation_vec4_visitor : public vec4_visitor { public: - copy_propagation_vec4_visitor(struct brw_context *brw, + copy_propagation_vec4_visitor(struct brw_compiler *compiler, struct gl_shader_program *shader_prog) - : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, + : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, - ST_NONE, ST_NONE, ST_NONE) + false /* no_spills */, -1) { } @@ -93,21 +92,20 @@ protected: void copy_propagation_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; vp = ralloc(NULL, struct brw_vertex_program); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new copy_propagation_vec4_visitor(brw, shader_prog); + v = new copy_propagation_vec4_visitor(compiler, shader_prog); _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static void diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index c8c67574e95..de2afd39cfe 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -35,7 +35,7 @@ class register_coalesce_test : public ::testing::Test { virtual void SetUp(); public: - struct brw_context *brw; + struct brw_compiler *compiler; struct brw_device_info *devinfo; struct gl_context *ctx; struct gl_shader_program *shader_prog; @@ -47,12 +47,11 @@ public: class register_coalesce_vec4_visitor : public vec4_visitor { public: - register_coalesce_vec4_visitor(struct brw_context *brw, + register_coalesce_vec4_visitor(struct brw_compiler *compiler, struct gl_shader_program *shader_prog) - : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, + : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, - ST_NONE, ST_NONE, ST_NONE) + false /* no_spills */, -1) { } @@ -96,21 +95,20 @@ protected: void register_coalesce_test::SetUp() { - brw = (struct brw_context *)calloc(1, sizeof(*brw)); - devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); - brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - ctx = &brw->ctx; + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; vp = ralloc(NULL, struct brw_vertex_program); shader_prog = ralloc(NULL, struct gl_shader_program); - v = new register_coalesce_vec4_visitor(brw, shader_prog); + v = new register_coalesce_vec4_visitor(compiler, shader_prog); _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); - brw->gen = devinfo->gen = 4; + devinfo->gen = 4; } static void diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index 6c479f5f0c6..c78d4baa124 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -242,7 +242,7 @@ static void nouveau_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment, struct gl_renderbuffer *rb) { - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb); context_dirty(ctx, FRAMEBUFFER); } diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c index c0c7b26bbf7..1398385b262 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c @@ -31,6 +31,8 @@ #include "nv10_3d.xml.h" #include "nv10_driver.h" +#include "util/simple_list.h" + void nv10_emit_clip_plane(struct gl_context *ctx, int emit) { diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c index f0acbed8560..41395516ea4 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c @@ -32,6 +32,8 @@ #include "nv10_driver.h" #include "nv20_driver.h" +#include "util/simple_list.h" + #define LIGHT_MODEL_AMBIENT_R(side) \ ((side) ? NV20_3D_LIGHT_MODEL_BACK_AMBIENT_R : \ NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_R) diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index b0a6bd573b6..6fe70b5c9d0 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2215,9 +2215,9 @@ GLboolean r200ValidateState( struct gl_context *ctx ) GLuint new_state = rmesa->radeon.NewGLState; if (new_state & _NEW_BUFFERS) { - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); R200_STATECHANGE(rmesa, ctx); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 0ca526d2a02..2a8bd6c9edc 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -220,9 +220,9 @@ void radeon_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb) */ if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); } if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 97022f95953..ef62d097bae 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -723,7 +723,7 @@ radeon_framebuffer_renderbuffer(struct gl_context * ctx, "%s(%p, fb %p, rb %p) \n", __func__, ctx, fb, rb); - _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb); radeon_draw_buffer(ctx, fb); } diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index c45bb513dca..cba3d9c9689 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -1994,9 +1994,9 @@ GLboolean radeonValidateState( struct gl_context *ctx ) GLuint new_state = rmesa->radeon.NewGLState; if (new_state & _NEW_BUFFERS) { - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); RADEON_STATECHANGE(rmesa, ctx); } diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 2ddb474dde7..2d4bb702fc2 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -62,7 +62,9 @@ #include "swrast/s_context.h" #include <sys/types.h> -#include <sys/sysctl.h> +#ifdef HAVE_SYS_SYSCTL_H +# include <sys/sysctl.h> +#endif const __DRIextension **__driDriverGetExtensions_swrast(void); @@ -958,6 +960,7 @@ static const __DRIextension *swrast_driver_extensions[] = { &driCoreExtension.base, &driSWRastExtension.base, &driCopySubBufferExtension.base, + &dri2ConfigQueryExtension.base, &swrast_vtable.base, NULL }; diff --git a/src/mesa/drivers/haiku/swrast/SConscript b/src/mesa/drivers/haiku/swrast/SConscript deleted file mode 100644 index 907325e3252..00000000000 --- a/src/mesa/drivers/haiku/swrast/SConscript +++ /dev/null @@ -1,33 +0,0 @@ -Import('*') - -env = env.Clone() - -env.Append(CPPPATH = [ - '#/src', - '#/src/mapi', - '#/src/mesa', - '#/src/mesa/main', - '#/include/HaikuGL', - '/boot/system/develop/headers/private', - Dir('../../../mapi'), # src/mapi build path for python-generated GL API files/headers -]) - -env.Prepend(LIBS = [ - mesautil, - glsl, - mesa, -]) - -env.Prepend(LIBS = [libgl]) - -sources = [ - 'SoftwareRast.cpp' -] - -# Disallow undefined symbols -#env.Append(SHLINKFLAGS = ['-Wl,-z,defs']) - -libswrast = env.SharedLibrary( - target = 'swrast', - source = sources -) diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp b/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp deleted file mode 100644 index 813ad1ff27d..00000000000 --- a/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp +++ /dev/null @@ -1,697 +0,0 @@ -/* - * Copyright 2006-2012, Haiku, Inc. All rights reserved. - * Distributed under the terms of the MIT License. - * - * Authors: - * Jérôme Duval, [email protected] - * Philippe Houdoin, [email protected] - * Artur Wyszynski, [email protected] - * Alexander von Gluck, [email protected] - */ - - -#include <kernel/image.h> -#include "SoftwareRast.h" - -#include <Autolock.h> -#include <interface/DirectWindowPrivate.h> -#include <GraphicsDefs.h> -#include <Screen.h> -#include <stdio.h> -#include <string.h> - -extern "C" { -#include "extensions.h" -#include "drivers/common/driverfuncs.h" -#include "drivers/common/meta.h" -#include "main/api_exec.h" -#include "main/colormac.h" -#include "main/cpuinfo.h" -#include "main/buffers.h" -#include "main/formats.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" -#include "main/version.h" -#include "main/vtxfmt.h" -#include "swrast/swrast.h" -#include "swrast/s_renderbuffer.h" -#include "swrast_setup/swrast_setup.h" -#include "tnl/tnl.h" -#include "tnl/t_context.h" -#include "tnl/t_pipeline.h" -#include "vbo/vbo.h" - - -#ifdef DEBUG -# define TRACE(x...) printf("MesaSoftwareRast: " x) -# define CALLED() printf("MesaSoftwareRast: %s\n", __PRETTY_FUNCTION__) -#else -# define TRACE(x...) -# define CALLED() -#endif - -#define ERROR(x...) printf("MesaSoftwareRast: " x) -} - - -extern const char* color_space_name(color_space space); - - -extern "C" _EXPORT BGLRenderer* -instantiate_gl_renderer(BGLView* view, ulong options, - BGLDispatcher* dispatcher) -{ - return new MesaSoftwareRast(view, options, dispatcher); -} - - -MesaSoftwareRast::MesaSoftwareRast(BGLView* view, ulong options, - BGLDispatcher* dispatcher) - : BGLRenderer(view, options, dispatcher), - fBitmap(NULL), - fDirectModeEnabled(false), - fInfo(NULL), - fInfoLocker("info locker"), - fVisual(NULL), - fFrameBuffer(NULL), - fFrontRenderBuffer(NULL), - fBackRenderBuffer(NULL), - fColorSpace(B_NO_COLOR_SPACE) -{ - CALLED(); - - fColorSpace = BScreen(GLView()->Window()).ColorSpace(); - - // We force single buffering for the time being - options &= ~BGL_DOUBLE; - - const GLboolean rgbFlag = ((options & BGL_INDEX) == 0); - const GLboolean alphaFlag = ((options & BGL_ALPHA) == BGL_ALPHA); - const GLboolean dblFlag = ((options & BGL_DOUBLE) == BGL_DOUBLE); - const GLboolean stereoFlag = false; - const GLint depth = (options & BGL_DEPTH) ? 16 : 0; - const GLint stencil = (options & BGL_STENCIL) ? 8 : 0; - const GLint accum = (options & BGL_ACCUM) ? 16 : 0; - const GLint red = rgbFlag ? 8 : 0; - const GLint green = rgbFlag ? 8 : 0; - const GLint blue = rgbFlag ? 8 : 0; - const GLint alpha = alphaFlag ? 8 : 0; - - fOptions = options; // | BGL_INDIRECT; - struct dd_function_table functions; - - fVisual = _mesa_create_visual(dblFlag, stereoFlag, red, green, - blue, alpha, depth, stencil, accum, accum, accum, - alpha ? accum : 0, 1); - - // Initialize device driver function table - _mesa_init_driver_functions(&functions); - - functions.GetString = _GetString; - functions.UpdateState = _UpdateState; - functions.MapRenderbuffer = _RenderBufferMap; - functions.Flush = _Flush; - - // create core context - // We inherit gl_context to this class - _mesa_initialize_context(this, API_OPENGL_COMPAT, fVisual, NULL, - &functions); - - /* Initialize the software rasterizer and helper modules. */ - _swrast_CreateContext(this); - _vbo_CreateContext(this); - _tnl_CreateContext(this); - _swsetup_CreateContext(this); - _swsetup_Wakeup(this); - - // Use default TCL pipeline - TNL_CONTEXT(this)->Driver.RunPipeline = _tnl_run_pipeline; - - _mesa_meta_init(this); - _mesa_enable_sw_extensions(this); - - _mesa_compute_version(this); - - _mesa_initialize_dispatch_tables(this); - _mesa_initialize_vbo_vtxfmt(this); - - // create core framebuffer - fFrameBuffer = _mesa_create_framebuffer(fVisual); - if (fFrameBuffer == NULL) { - ERROR("%s: Unable to calloc GL FrameBuffer!\n", __func__); - _mesa_destroy_visual(fVisual); - return; - } - - // Setup front render buffer - fFrontRenderBuffer = _NewRenderBuffer(true); - if (fFrontRenderBuffer == NULL) { - ERROR("%s: FrontRenderBuffer is requested but unallocated!\n", - __func__); - _mesa_destroy_visual(fVisual); - free(fFrameBuffer); - return; - } - _mesa_add_renderbuffer(fFrameBuffer, BUFFER_FRONT_LEFT, - &fFrontRenderBuffer->Base); - - // Setup back render buffer (if requested) - if (fVisual->doubleBufferMode) { - fBackRenderBuffer = _NewRenderBuffer(false); - if (fBackRenderBuffer == NULL) { - ERROR("%s: BackRenderBuffer is requested but unallocated!\n", - __func__); - _mesa_destroy_visual(fVisual); - free(fFrameBuffer); - return; - } - _mesa_add_renderbuffer(fFrameBuffer, BUFFER_BACK_LEFT, - &fBackRenderBuffer->Base); - } - - _swrast_add_soft_renderbuffers(fFrameBuffer, GL_FALSE, - fVisual->haveDepthBuffer, fVisual->haveStencilBuffer, - fVisual->haveAccumBuffer, alphaFlag, GL_FALSE); - - BRect bounds = view->Bounds(); - fWidth = (GLint)bounds.Width(); - fHeight = (GLint)bounds.Height(); - - // some stupid applications (Quake2) don't even think about calling LockGL() - // before using glGetString and its glGet*() friends... - // so make sure there is at least a valid context. - - if (!_mesa_get_current_context()) { - LockGL(); - // not needed, we don't have a looper yet: UnlockLooper(); - } -} - - -MesaSoftwareRast::~MesaSoftwareRast() -{ - CALLED(); - _swsetup_DestroyContext(this); - _swrast_DestroyContext(this); - _tnl_DestroyContext(this); - _vbo_DestroyContext(this); - _mesa_destroy_visual(fVisual); - _mesa_destroy_framebuffer(fFrameBuffer); - _mesa_destroy_context(this); - - free(fInfo); - free(fFrameBuffer); - - delete fBitmap; -} - - -void -MesaSoftwareRast::LockGL() -{ - CALLED(); - BGLRenderer::LockGL(); - - _mesa_make_current(this, fFrameBuffer, fFrameBuffer); - - color_space colorSpace = BScreen(GLView()->Window()).ColorSpace(); - - GLuint width = fWidth; - GLuint height = fHeight; - - BAutolock lock(fInfoLocker); - if (fDirectModeEnabled && fInfo != NULL) { - width = fInfo->window_bounds.right - - fInfo->window_bounds.left + 1; - height = fInfo->window_bounds.bottom - - fInfo->window_bounds.top + 1; - } - - if (fColorSpace != colorSpace) { - fColorSpace = colorSpace; - _SetupRenderBuffer(&fFrontRenderBuffer->Base, fColorSpace); - if (fVisual->doubleBufferMode) - _SetupRenderBuffer(&fBackRenderBuffer->Base, fColorSpace); - } - - _CheckResize(width, height); -} - - -void -MesaSoftwareRast::UnlockGL() -{ - CALLED(); - _mesa_make_current(this, NULL, NULL); - BGLRenderer::UnlockGL(); -} - - -void -MesaSoftwareRast::SwapBuffers(bool VSync) -{ - CALLED(); - - if (!fBitmap) - return; - - if (fVisual->doubleBufferMode) - _mesa_notifySwapBuffers(this); - - if (!fDirectModeEnabled || fInfo == NULL) { - if (GLView()->LockLooperWithTimeout(1000) == B_OK) { - GLView()->DrawBitmap(fBitmap, B_ORIGIN); - GLView()->UnlockLooper(); - } - } else { - // TODO: Here the BGLView needs to be drawlocked. - _CopyToDirect(); - } - - if (VSync) { - BScreen screen(GLView()->Window()); - screen.WaitForRetrace(); - } -} - - -void -MesaSoftwareRast::Draw(BRect updateRect) -{ - CALLED(); - if (fBitmap && (!fDirectModeEnabled || (fInfo == NULL))) - GLView()->DrawBitmap(fBitmap, updateRect, updateRect); -} - - -status_t -MesaSoftwareRast::CopyPixelsOut(BPoint location, BBitmap* bitmap) -{ - CALLED(); - color_space scs = fBitmap->ColorSpace(); - color_space dcs = bitmap->ColorSpace(); - - if (scs != dcs && (scs != B_RGBA32 || dcs != B_RGB32)) { - fprintf(stderr, "CopyPixelsOut(): incompatible color space: %s != %s\n", - color_space_name(scs), - color_space_name(dcs)); - return B_BAD_TYPE; - } - - BRect sr = fBitmap->Bounds(); - BRect dr = bitmap->Bounds(); - - sr = sr & dr.OffsetBySelf(location); - dr = sr.OffsetByCopy(-location.x, -location.y); - - uint8* ps = (uint8*)fBitmap->Bits(); - uint8* pd = (uint8*)bitmap->Bits(); - uint32* s; - uint32* d; - uint32 y; - for (y = (uint32)sr.top; y <= (uint32)sr.bottom; y++) { - s = (uint32*)(ps + y * fBitmap->BytesPerRow()); - s += (uint32)sr.left; - - d = (uint32*)(pd + (y + (uint32)(dr.top - sr.top)) - * bitmap->BytesPerRow()); - d += (uint32)dr.left; - - memcpy(d, s, dr.IntegerWidth() * 4); - } - return B_OK; -} - - -status_t -MesaSoftwareRast::CopyPixelsIn(BBitmap* bitmap, BPoint location) -{ - CALLED(); - color_space scs = bitmap->ColorSpace(); - color_space dcs = fBitmap->ColorSpace(); - - if (scs != dcs && (dcs != B_RGBA32 || scs != B_RGB32)) { - fprintf(stderr, "CopyPixelsIn(): incompatible color space: %s != %s\n", - color_space_name(scs), - color_space_name(dcs)); - return B_BAD_TYPE; - } - - BRect sr = bitmap->Bounds(); - BRect dr = fBitmap->Bounds(); - - sr = sr & dr.OffsetBySelf(location); - dr = sr.OffsetByCopy(-location.x, -location.y); - - uint8* ps = (uint8*)bitmap->Bits(); - uint8* pd = (uint8*)fBitmap->Bits(); - uint32* s; - uint32* d; - uint32 y; - for (y = (uint32)sr.top; y <= (uint32)sr.bottom; y++) { - s = (uint32*)(ps + y * bitmap->BytesPerRow()); - s += (uint32)sr.left; - - d = (uint32*)(pd + (y + (uint32)(dr.top - sr.top)) - * fBitmap->BytesPerRow()); - d += (uint32)dr.left; - - memcpy(d, s, dr.IntegerWidth() * 4); - } - return B_OK; -} - - -void -MesaSoftwareRast::EnableDirectMode(bool enabled) -{ - fDirectModeEnabled = enabled; -} - - -void -MesaSoftwareRast::DirectConnected(direct_buffer_info* info) -{ - // TODO: I'm not sure we need to do this: BGLView already - // keeps a local copy of the direct_buffer_info passed by - // BDirectWindow::DirectConnected(). - BAutolock lock(fInfoLocker); - if (info) { - if (!fInfo) { - fInfo = (direct_buffer_info*)malloc(DIRECT_BUFFER_INFO_AREA_SIZE); - if (!fInfo) - return; - } - memcpy(fInfo, info, DIRECT_BUFFER_INFO_AREA_SIZE); - } else if (fInfo) { - free(fInfo); - fInfo = NULL; - } -} - - -void -MesaSoftwareRast::FrameResized(float width, float height) -{ - BAutolock lock(fInfoLocker); - _CheckResize((GLuint)width, (GLuint)height); -} - - -void -MesaSoftwareRast::_CheckResize(GLuint newWidth, GLuint newHeight) -{ - CALLED(); - - if (fBitmap && newWidth == fWidth - && newHeight == fHeight) { - return; - } - - _mesa_resize_framebuffer(this, fFrameBuffer, newWidth, newHeight); - fHeight = newHeight; - fWidth = newWidth; - - _AllocateBitmap(); -} - - -void -MesaSoftwareRast::_AllocateBitmap() -{ - CALLED(); - - // allocate new size of back buffer bitmap - delete fBitmap; - fBitmap = NULL; - - if (fWidth < 1 || fHeight < 1) { - TRACE("%s: Cannot allocate bitmap < 1x1!\n", __func__); - return; - } - - BRect rect(0.0, 0.0, fWidth - 1, fHeight - 1); - fBitmap = new BBitmap(rect, fColorSpace); - - #if 0 - // Used for platform optimized drawing - for (uint i = 0; i < fHeight; i++) { - fRowAddr[fHeight - i - 1] = (GLvoid *)((GLubyte *)fBitmap->Bits() - + i * fBitmap->BytesPerRow()); - } - #endif - - fFrameBuffer->Width = fWidth; - fFrameBuffer->Height = fHeight; - TRACE("%s: Bitmap Size: %" B_PRIu32 "\n", __func__, fBitmap->BitsLength()); - - fFrontRenderBuffer->Buffer = (GLubyte*)fBitmap->Bits(); -} - - -// #pragma mark - static - - -const GLubyte* -MesaSoftwareRast::_GetString(gl_context* ctx, GLenum name) -{ - switch (name) { - case GL_VENDOR: - return (const GLubyte*) "Mesa Project"; - case GL_RENDERER: - return (const GLubyte*) "Software Rasterizer"; - default: - // Let core library handle all other cases - return NULL; - } -} - - -void -MesaSoftwareRast::_UpdateState(gl_context* ctx, GLuint new_state) -{ - if (!ctx) - return; - - CALLED(); - _swrast_InvalidateState(ctx, new_state); - _swsetup_InvalidateState(ctx, new_state); - _vbo_InvalidateState(ctx, new_state); - _tnl_InvalidateState(ctx, new_state); -} - - -GLboolean -MesaSoftwareRast::_RenderBufferStorage(gl_context* ctx, - struct gl_renderbuffer* render, GLenum internalFormat, - GLuint width, GLuint height) -{ - CALLED(); - - render->Width = width; - render->Height = height; - - struct swrast_renderbuffer *swRenderBuffer = swrast_renderbuffer(render); - - swRenderBuffer->RowStride = width * _mesa_get_format_bytes(render->Format); - - return GL_TRUE; -} - - -GLboolean -MesaSoftwareRast::_RenderBufferStorageMalloc(gl_context* ctx, - struct gl_renderbuffer* render, GLenum internalFormat, - GLuint width, GLuint height) -{ - CALLED(); - - render->Width = width; - render->Height = height; - - struct swrast_renderbuffer *swRenderBuffer = swrast_renderbuffer(render); - - if (swRenderBuffer != NULL) { - free(swRenderBuffer->Buffer); - swRenderBuffer->RowStride - = width * _mesa_get_format_bytes(render->Format); - - uint32 size = swRenderBuffer->RowStride * height; - TRACE("%s: Allocate %" B_PRIu32 " bytes for RenderBuffer\n", - __func__, size); - swRenderBuffer->Buffer = (GLubyte*)malloc(size); - if (!swRenderBuffer->Buffer) { - ERROR("%s: Memory allocation failure!\n", __func__); - return GL_FALSE; - } - } else { - ERROR("%s: Couldn't obtain software renderbuffer!\n", - __func__); - return GL_FALSE; - } - - return GL_TRUE; -} - - -void -MesaSoftwareRast::_Flush(gl_context* ctx) -{ - CALLED(); - MesaSoftwareRast* driverContext = static_cast<MesaSoftwareRast*>(ctx); - - //MesaSoftwareRast* driverContext = (MesaSoftwareRast*)ctx->DriverCtx; - if ((driverContext->fOptions & BGL_DOUBLE) == 0) { - // TODO: SwapBuffers() can call _CopyToDirect(), which should - // be always called with with the BGLView drawlocked. - // This is not always the case if called from here. - driverContext->SwapBuffers(); - } -} - - -struct swrast_renderbuffer* -MesaSoftwareRast::_NewRenderBuffer(bool front) -{ - CALLED(); - struct swrast_renderbuffer *swRenderBuffer - = (struct swrast_renderbuffer*)calloc(1, sizeof *swRenderBuffer); - - if (!swRenderBuffer) { - ERROR("%s: Failed calloc RenderBuffer\n", __func__); - return NULL; - } - - _mesa_init_renderbuffer(&swRenderBuffer->Base, 0); - - swRenderBuffer->Base.ClassID = HAIKU_SWRAST_RENDERBUFFER_CLASS; - swRenderBuffer->Base.RefCount = 1; - swRenderBuffer->Base.Delete = _RenderBufferDelete; - - if (!front) - swRenderBuffer->Base.AllocStorage = _RenderBufferStorageMalloc; - else - swRenderBuffer->Base.AllocStorage = _RenderBufferStorage; - - if (_SetupRenderBuffer(&swRenderBuffer->Base, fColorSpace) != B_OK) { - free(swRenderBuffer); - return NULL; - } - - return swRenderBuffer; -} - - -status_t -MesaSoftwareRast::_SetupRenderBuffer(struct gl_renderbuffer* rb, - color_space colorSpace) -{ - CALLED(); - - rb->InternalFormat = GL_RGBA; - - switch (colorSpace) { - case B_RGBA32: - rb->_BaseFormat = GL_RGBA; - rb->Format = MESA_FORMAT_B8G8R8A8_UNORM; - break; - case B_RGB32: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_B8G8R8X8_UNORM; - break; - case B_RGB24: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_BGR_UNORM8; - break; - case B_RGB16: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_B5G6R5_UNORM; - break; - case B_RGB15: - rb->_BaseFormat = GL_RGB; - rb->Format = MESA_FORMAT_B5G5R5A1_UNORM; - break; - default: - fprintf(stderr, "Unsupported screen color space %s\n", - color_space_name(fColorSpace)); - debugger("Unsupported OpenGL color space"); - return B_ERROR; - } - return B_OK; -} - - -/*! Y inverted Map RenderBuffer function - We use a BBitmap for storage which has Y inverted. - If the Mesa provided Map function ever allows external - control of this we can omit this function. -*/ -void -MesaSoftwareRast::_RenderBufferMap(gl_context *ctx, - struct gl_renderbuffer *rb, GLuint x, GLuint y, GLuint w, GLuint h, - GLbitfield mode, GLubyte **mapOut, GLint *rowStrideOut) -{ - if (rb->ClassID == HAIKU_SWRAST_RENDERBUFFER_CLASS) { - struct swrast_renderbuffer *srb = swrast_renderbuffer(rb); - const GLuint bpp = _mesa_get_format_bytes(rb->Format); - GLint rowStride = rb->Width * bpp; // in Bytes - - y = rb->Height - y - 1; - - *rowStrideOut = -rowStride; - *mapOut = (GLubyte *) srb->Buffer + y * rowStride + x * bpp; - } else { - _swrast_map_soft_renderbuffer(ctx, rb, x, y, w, h, mode, - mapOut, rowStrideOut); - } -} - - -void -MesaSoftwareRast::_RenderBufferDelete(struct gl_context *ctx, - struct gl_renderbuffer* rb) -{ - CALLED(); - if (rb != NULL) { - struct swrast_renderbuffer *swRenderBuffer - = swrast_renderbuffer(rb); - if (swRenderBuffer != NULL) - free(swRenderBuffer->Buffer); - } - free(rb); -} - - -void -MesaSoftwareRast::_CopyToDirect() -{ - BAutolock lock(fInfoLocker); - - // check the bitmap size still matches the size - if (fInfo->window_bounds.bottom - fInfo->window_bounds.top - != fBitmap->Bounds().IntegerHeight() - || fInfo->window_bounds.right - fInfo->window_bounds.left - != fBitmap->Bounds().IntegerWidth()) - return; - - uint8 bytesPerPixel = fInfo->bits_per_pixel / 8; - uint32 bytesPerRow = fBitmap->BytesPerRow(); - for (uint32 i = 0; i < fInfo->clip_list_count; i++) { - clipping_rect *clip = &fInfo->clip_list[i]; - int32 height = clip->bottom - clip->top + 1; - int32 bytesWidth - = (clip->right - clip->left + 1) * bytesPerPixel; - uint8* p = (uint8*)fInfo->bits + clip->top - * fInfo->bytes_per_row + clip->left * bytesPerPixel; - uint8* b = (uint8*)fBitmap->Bits() - + (clip->top - fInfo->window_bounds.top) * bytesPerRow - + (clip->left - fInfo->window_bounds.left) - * bytesPerPixel; - - for (int y = 0; y < height; y++) { - memcpy(p, b, bytesWidth); - p += fInfo->bytes_per_row; - b += bytesPerRow; - } - } -} diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.h b/src/mesa/drivers/haiku/swrast/SoftwareRast.h deleted file mode 100644 index 8f0f0184863..00000000000 --- a/src/mesa/drivers/haiku/swrast/SoftwareRast.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2006-2012, Haiku, Inc. All rights reserved. - * Distributed under the terms of the MIT License. - * - * Authors: - * Jérôme Duval, [email protected] - * Philippe Houdoin, [email protected] - * Artur Wyszynski, [email protected] - */ -#ifndef MESASOFTWARERENDERER_H -#define MESASOFTWARERENDERER_H - - -#define HAIKU_SWRAST_RENDERBUFFER_CLASS 0x737752 // swR - - -#include "GLRenderer.h" - -extern "C" { -#include "context.h" -#include "main/version.h" -#include "swrast/s_chan.h" -#include "swrast/s_context.h" -} - - -class MesaSoftwareRast : public BGLRenderer, public gl_context { -public: - MesaSoftwareRast(BGLView* view, - ulong bgl_options, - BGLDispatcher* dispatcher); - virtual ~MesaSoftwareRast(); - - virtual void LockGL(); - virtual void UnlockGL(); - - virtual void SwapBuffers(bool VSync = false); - virtual void Draw(BRect updateRect); - virtual status_t CopyPixelsOut(BPoint source, BBitmap* dest); - virtual status_t CopyPixelsIn(BBitmap* source, BPoint dest); - virtual void FrameResized(float width, float height); - - virtual void EnableDirectMode(bool enabled); - virtual void DirectConnected(direct_buffer_info* info); - -private: - static const GLubyte* _GetString(gl_context* ctx, GLenum name); - void _CheckResize(GLuint newWidth, GLuint newHeight); - static void _UpdateState(gl_context* ctx, GLuint newState); - static void _Flush(gl_context *ctx); - - struct swrast_renderbuffer* _NewRenderBuffer(bool front); - status_t _SetupRenderBuffer(struct gl_renderbuffer* rb, - color_space colorSpace); - -/* Mesa callbacks */ - static void _RenderBufferDelete(struct gl_context *ctx, - struct gl_renderbuffer* rb); - static GLboolean _RenderBufferStorage(gl_context* ctx, - struct gl_renderbuffer* render, - GLenum internalFormat, - GLuint width, GLuint height); - static GLboolean _RenderBufferStorageMalloc(gl_context* ctx, - struct gl_renderbuffer* render, - GLenum internalFormat, - GLuint width, GLuint height); - static void _RenderBufferMap(gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint x, GLuint y, GLuint w, GLuint h, - GLbitfield mode, GLubyte **mapOut, - GLint *rowStrideOut); - - void _AllocateBitmap(); - void _CopyToDirect(); - - BBitmap* fBitmap; - bool fDirectModeEnabled; - direct_buffer_info* fInfo; - BLocker fInfoLocker; - ulong fOptions; - - gl_config* fVisual; - - struct gl_framebuffer* fFrameBuffer; - struct swrast_renderbuffer* fFrontRenderBuffer; - struct swrast_renderbuffer* fBackRenderBuffer; - - GLuint fWidth; - GLuint fHeight; - color_space fColorSpace; - - void* fRowAddr[SWRAST_MAX_HEIGHT]; -}; - -#endif // MESASOFTWARERENDERER_H diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef b/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef deleted file mode 100644 index cb60332100c..00000000000 --- a/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2012, Haiku, Inc. All rights reserved. - * Distributed under the terms of the MIT License. - */ - -resource app_signature "application/x-vnd.Haiku-swrast"; - -resource app_version { - major = 9, - middle = 0, - minor = 0, - variety = 0, - internal = 0, - short_info = "Software Rasterizer", - long_info = "Haiku Mesa Software GL Rasterizer" -}; - -resource vector_icon { - $"6E6369660A0200140294A9FF18020014028DFFFF97058C0500020006023B10B7" - $"37F036BA1A993D466848C719BEBE2000919292FFD5D5D5020016023900000000" - $"000000003EE0004AE00048E0005EF884C702000203392E8D383001BAD97F3C12" - $"8B4786BD48B8AD0D97BBFFFF7B4168DBE9FF4168DB97020002023A0C1238D099" - $"BE44203F4BD14B38844678240DF56A7D9FE1EA064CC704016B0500090A044024" - $"2438404C5C380A044028243C40505C3C0A042438243B5C3C5C380608BFBE4D59" - $"4D59515957575659585560406044603C5E3A5C3CCB4FBFBA5E3ECA9DC11F564B" - $"584A544C504C0606AF0F2F3D2F3D393D4034BF593542324130432F42364432C0" - $"3FBC5A2F48354A2F480608AE9A22303EB5BD3AB42542B755422E412F3C29322D" - $"32223C0204263726372538263F253E263F304430443143303C313D303C02043D" - $"423D423C433D4A3C493D4A495049504A4F49474A484947060DAEAAAE014E445A" - $"3456365E325E3D5D3F5A3A5542544E4D573A4E364439463342324A2242310A0A" - $"0002020102403CA00C88888C8CC1401673C40D6544F2950A01010002403CA000" - $"0000000000401673C40D65446CF80A08020304023EC16A0000000000003EC16A" - $"45DD1844C6550A030105123EC16A0000000000003EC16A45DD1844C655011784" - $"22040A040105023EC16A0000000000003EC16A45DD1844C6550A030108123EC1" - $"6A0000000000003EC16A45DD1844C65501178422040A0503080706023EC16A00" - $"00000000003EC16A45DD1844C6550A030206071A3EC16A0000000000003EC16A" - $"45DD1844C65510FF0215810004178222040A060106023EC16A0000000000003E" - $"C16A45DD1844C6550A070107023EC16A0000000000003EC16A45DD1844C655" -}; diff --git a/src/mesa/drivers/osmesa/Makefile.am b/src/mesa/drivers/osmesa/Makefile.am index 9a388d64cd5..46332e16bd1 100644 --- a/src/mesa/drivers/osmesa/Makefile.am +++ b/src/mesa/drivers/osmesa/Makefile.am @@ -39,7 +39,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp lib@OSMESA_LIB@_la_SOURCES = osmesa.c lib@OSMESA_LIB@_la_LDFLAGS = \ - -module \ -no-undefined \ -version-number @OSMESA_VERSION@ \ $(GC_SECTIONS) \ diff --git a/src/mesa/drivers/x11/Makefile.am b/src/mesa/drivers/x11/Makefile.am index c0596f8119e..ba79f6981b9 100644 --- a/src/mesa/drivers/x11/Makefile.am +++ b/src/mesa/drivers/x11/Makefile.am @@ -25,6 +25,11 @@ EXTRA_DIST = SConscript +if HAVE_SHARED_GLAPI +SHARED_GLAPI_CFLAGS = -DGLX_SHARED_GLAPI +SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la +endif + AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/mapi \ @@ -34,11 +39,10 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/mesa/main \ $(X11_INCLUDES) \ + $(SHARED_GLAPI_CFLAGS) \ $(DEFINES) -if HAVE_X11_DRIVER lib_LTLIBRARIES = lib@[email protected] -endif lib@GL_LIB@_la_SOURCES = \ glxapi.h \ @@ -66,6 +70,7 @@ GL_PATCH = 0 lib@GL_LIB@_la_LIBADD = \ $(top_builddir)/src/mesa/libmesa.la \ $(top_builddir)/src/mapi/glapi/libglapi.la \ + $(SHARED_GLAPI_LIB) \ $(GL_LIB_DEPS) lib@GL_LIB@_la_LDFLAGS = \ diff --git a/src/mesa/main/api_exec.h b/src/mesa/main/api_exec.h index 12249fec228..655cb32d0a4 100644 --- a/src/mesa/main/api_exec.h +++ b/src/mesa/main/api_exec.h @@ -38,6 +38,9 @@ _mesa_initialize_exec_table(struct gl_context *ctx); extern void _mesa_initialize_dispatch_tables(struct gl_context *ctx); +extern struct _glapi_table * +_mesa_new_nop_table(unsigned numEntries); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/mesa/main/api_loopback.c b/src/mesa/main/api_loopback.c index 9932a837336..a7fd82c531f 100644 --- a/src/mesa/main/api_loopback.c +++ b/src/mesa/main/api_loopback.c @@ -1772,7 +1772,9 @@ _mesa_loopback_init_api_table(const struct gl_context *ctx, SET_VertexAttribI4sv(dest, _mesa_VertexAttribI4sv); SET_VertexAttribI4ubv(dest, _mesa_VertexAttribI4ubv); SET_VertexAttribI4usv(dest, _mesa_VertexAttribI4usv); + } + if (ctx->API == API_OPENGL_CORE) { /* GL 4.1 / GL_ARB_vertex_attrib_64bit */ SET_VertexAttribL1d(dest, _mesa_VertexAttribL1d); SET_VertexAttribL2d(dest, _mesa_VertexAttribL2d); diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index b163c0aa699..53626e38be9 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -177,6 +177,10 @@ struct texture_state }; +/** An unused GL_*_BIT value */ +#define DUMMY_BIT 0x10000000 + + /** * Allocate new attribute node of given type/kind. Attach payload data. * Insert it into the linked list named by 'head'. @@ -253,6 +257,15 @@ _mesa_PushAttrib(GLbitfield mask) /* groups specified by the mask. */ head = NULL; + if (mask == 0) { + /* if mask is zero we still need to push something so that we + * don't get a GL_STACK_UNDERFLOW error in glPopAttrib(). + */ + GLuint dummy = 0; + if (!push_attrib(ctx, &head, DUMMY_BIT, sizeof(dummy), &dummy)) + goto end; + } + if (mask & GL_ACCUM_BUFFER_BIT) { if (!push_attrib(ctx, &head, GL_ACCUM_BUFFER_BIT, sizeof(struct gl_accum_attrib), @@ -928,6 +941,10 @@ _mesa_PopAttrib(void) } switch (attr->kind) { + case DUMMY_BIT: + /* do nothing */ + break; + case GL_ACCUM_BUFFER_BIT: { const struct gl_accum_attrib *accum; @@ -1074,6 +1091,11 @@ _mesa_PopAttrib(void) _mesa_ClearDepth(depth->Clear); _mesa_set_enable(ctx, GL_DEPTH_TEST, depth->Test); _mesa_DepthMask(depth->Mask); + if (ctx->Extensions.EXT_depth_bounds_test) { + _mesa_set_enable(ctx, GL_DEPTH_BOUNDS_TEST_EXT, + depth->BoundsTest); + _mesa_DepthBoundsEXT(depth->BoundsMin, depth->BoundsMax); + } } break; case GL_ENABLE_BIT: diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index 774fc888ec4..d869fa2aa09 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -769,7 +769,7 @@ _mesa_ClampColor(GLenum target, GLenum clamp) } FLUSH_VERTICES(ctx, _NEW_LIGHT); ctx->Light.ClampVertexColor = clamp; - _mesa_update_clamp_vertex_color(ctx); + _mesa_update_clamp_vertex_color(ctx, ctx->DrawBuffer); break; case GL_CLAMP_FRAGMENT_COLOR_ARB: if (ctx->API == API_OPENGL_CORE && @@ -778,7 +778,7 @@ _mesa_ClampColor(GLenum target, GLenum clamp) } FLUSH_VERTICES(ctx, _NEW_FRAG_CLAMP); ctx->Color.ClampFragmentColor = clamp; - _mesa_update_clamp_fragment_color(ctx); + _mesa_update_clamp_fragment_color(ctx, ctx->DrawBuffer); break; case GL_CLAMP_READ_COLOR_ARB: ctx->Color.ClampReadColor = clamp; @@ -807,50 +807,55 @@ get_clamp_color(const struct gl_framebuffer *fb, GLenum clamp) } GLboolean -_mesa_get_clamp_fragment_color(const struct gl_context *ctx) +_mesa_get_clamp_fragment_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - return get_clamp_color(ctx->DrawBuffer, - ctx->Color.ClampFragmentColor); + return get_clamp_color(drawFb, ctx->Color.ClampFragmentColor); } GLboolean -_mesa_get_clamp_vertex_color(const struct gl_context *ctx) +_mesa_get_clamp_vertex_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - return get_clamp_color(ctx->DrawBuffer, ctx->Light.ClampVertexColor); + return get_clamp_color(drawFb, ctx->Light.ClampVertexColor); } GLboolean -_mesa_get_clamp_read_color(const struct gl_context *ctx) +_mesa_get_clamp_read_color(const struct gl_context *ctx, + const struct gl_framebuffer *readFb) { - return get_clamp_color(ctx->ReadBuffer, ctx->Color.ClampReadColor); + return get_clamp_color(readFb, ctx->Color.ClampReadColor); } /** * Update the ctx->Color._ClampFragmentColor field */ void -_mesa_update_clamp_fragment_color(struct gl_context *ctx) +_mesa_update_clamp_fragment_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - struct gl_framebuffer *fb = ctx->DrawBuffer; - /* Don't clamp if: * - there is no colorbuffer * - all colorbuffers are unsigned normalized, so clamping has no effect * - there is an integer colorbuffer */ - if (!fb || !fb->_HasSNormOrFloatColorBuffer || fb->_IntegerColor) + if (!drawFb || !drawFb->_HasSNormOrFloatColorBuffer || + drawFb->_IntegerColor) ctx->Color._ClampFragmentColor = GL_FALSE; else - ctx->Color._ClampFragmentColor = _mesa_get_clamp_fragment_color(ctx); + ctx->Color._ClampFragmentColor = + _mesa_get_clamp_fragment_color(ctx, drawFb); } /** * Update the ctx->Color._ClampVertexColor field */ void -_mesa_update_clamp_vertex_color(struct gl_context *ctx) +_mesa_update_clamp_vertex_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb) { - ctx->Light._ClampVertexColor = _mesa_get_clamp_vertex_color(ctx); + ctx->Light._ClampVertexColor = + _mesa_get_clamp_vertex_color(ctx, drawFb); } /** diff --git a/src/mesa/main/blend.h b/src/mesa/main/blend.h index fe31a7440f0..8ab9e02fc13 100644 --- a/src/mesa/main/blend.h +++ b/src/mesa/main/blend.h @@ -37,6 +37,7 @@ #include "formats.h" struct gl_context; +struct gl_framebuffer; extern void GLAPIENTRY @@ -101,19 +102,24 @@ extern void GLAPIENTRY _mesa_ClampColor(GLenum target, GLenum clamp); extern GLboolean -_mesa_get_clamp_fragment_color(const struct gl_context *ctx); +_mesa_get_clamp_fragment_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern GLboolean -_mesa_get_clamp_vertex_color(const struct gl_context *ctx); +_mesa_get_clamp_vertex_color(const struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern GLboolean -_mesa_get_clamp_read_color(const struct gl_context *ctx); +_mesa_get_clamp_read_color(const struct gl_context *ctx, + const struct gl_framebuffer *readFb); extern void -_mesa_update_clamp_fragment_color(struct gl_context *ctx); +_mesa_update_clamp_fragment_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern void -_mesa_update_clamp_vertex_color(struct gl_context *ctx); +_mesa_update_clamp_vertex_color(struct gl_context *ctx, + const struct gl_framebuffer *drawFb); extern mesa_format _mesa_get_render_format(const struct gl_context *ctx, mesa_format format); diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index 0694466eb75..db8fee5a414 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -34,6 +34,7 @@ #include "enums.h" #include "blit.h" #include "fbobject.h" +#include "framebuffer.h" #include "glformats.h" #include "mtypes.h" #include "state.h" @@ -148,38 +149,25 @@ is_valid_blit_filter(const struct gl_context *ctx, GLenum filter) } -/** - * Blit rectangular region, optionally from one framebuffer to another. - * - * Note, if the src buffer is multisampled and the dest is not, this is - * when the samples must be resolved to a single color. - */ -void GLAPIENTRY -_mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) +void +_mesa_blit_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter, const char *func) { const GLbitfield legalMaskBits = (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - const struct gl_framebuffer *readFb, *drawFb; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, - "glBlitFramebuffer(%d, %d, %d, %d, %d, %d, %d, %d, 0x%x, %s)\n", - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask, _mesa_lookup_enum_by_nr(filter)); - - if (ctx->NewState) { - _mesa_update_state(ctx); - } + /* Update completeness status of readFb and drawFb. */ + _mesa_update_framebuffer(ctx, readFb, drawFb); - readFb = ctx->ReadBuffer; - drawFb = ctx->DrawBuffer; + /* Make sure drawFb has an initialized bounding box. */ + _mesa_update_draw_buffer_bounds(ctx, drawFb); if (!readFb || !drawFb) { /* This will normally never happen but someday we may want to @@ -192,12 +180,12 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if (drawFb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT || readFb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { _mesa_error(ctx, GL_INVALID_FRAMEBUFFER_OPERATION_EXT, - "glBlitFramebufferEXT(incomplete draw/read buffers)"); + "%s(incomplete draw/read buffers)", func); return; } if (!is_valid_blit_filter(ctx, filter)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glBlitFramebufferEXT(%s)", + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid filter %s)", func, _mesa_lookup_enum_by_nr(filter)); return; } @@ -205,13 +193,13 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if ((filter == GL_SCALED_RESOLVE_FASTEST_EXT || filter == GL_SCALED_RESOLVE_NICEST_EXT) && (readFb->Visual.samples == 0 || drawFb->Visual.samples > 0)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBlitFramebufferEXT(%s)", + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s: invalid samples)", func, _mesa_lookup_enum_by_nr(filter)); return; } if (mask & ~legalMaskBits) { - _mesa_error( ctx, GL_INVALID_VALUE, "glBlitFramebufferEXT(mask)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(invalid mask bits set)", func); return; } @@ -219,13 +207,13 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if ((mask & (GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT)) && filter != GL_NEAREST) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(depth/stencil requires GL_NEAREST filter)"); + "%s(depth/stencil requires GL_NEAREST filter)", func); return; } /* get color read/draw renderbuffers */ if (mask & GL_COLOR_BUFFER_BIT) { - const GLuint numColorDrawBuffers = ctx->DrawBuffer->_NumColorDrawBuffers; + const GLuint numColorDrawBuffers = drawFb->_NumColorDrawBuffers; const struct gl_renderbuffer *colorReadRb = readFb->_ColorReadBuffer; const struct gl_renderbuffer *colorDrawRb = NULL; GLuint i; @@ -241,7 +229,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } else { for (i = 0; i < numColorDrawBuffers; i++) { - colorDrawRb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + colorDrawRb = drawFb->_ColorDrawBuffers[i]; if (!colorDrawRb) continue; @@ -257,15 +245,15 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, */ if (_mesa_is_gles3(ctx) && (colorDrawRb == colorReadRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(source and destination color " - "buffer cannot be the same)"); + "%s(source and destination color " + "buffer cannot be the same)", func); return; } if (!compatible_color_datatypes(colorReadRb->Format, colorDrawRb->Format)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(color buffer datatypes mismatch)"); + "%s(color buffer datatypes mismatch)", func); return; } /* extra checks for multisample copies... */ @@ -273,7 +261,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, /* color formats must match */ if (!compatible_resolve_formats(colorReadRb, colorDrawRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(bad src/dst multisample pixel formats)"); + "%s(bad src/dst multisample pixel formats)", func); return; } } @@ -286,7 +274,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLenum type = _mesa_get_format_datatype(colorReadRb->Format); if (type == GL_INT || type == GL_UNSIGNED_INT) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(integer color type)"); + "%s(integer color type)", func); return; } } @@ -306,15 +294,15 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * ignored." */ if ((readRb == NULL) || (drawRb == NULL)) { - mask &= ~GL_STENCIL_BUFFER_BIT; + mask &= ~GL_STENCIL_BUFFER_BIT; } else { int read_z_bits, draw_z_bits; if (_mesa_is_gles3(ctx) && (drawRb == readRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(source and destination stencil " - "buffer cannot be the same)"); + "%s(source and destination stencil " + "buffer cannot be the same)", func); return; } @@ -324,7 +312,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * there is only one: GL_UNSIGNED_INT. */ _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(stencil attachment format mismatch)"); + "%s(stencil attachment format mismatch)", func); return; } @@ -340,8 +328,8 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, _mesa_get_format_datatype(readRb->Format) != _mesa_get_format_datatype(drawRb->Format))) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBlitFramebuffer" - "(stencil attachment depth format mismatch)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(stencil attachment depth format mismatch)", func); return; } } @@ -360,15 +348,15 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * ignored." */ if ((readRb == NULL) || (drawRb == NULL)) { - mask &= ~GL_DEPTH_BUFFER_BIT; + mask &= ~GL_DEPTH_BUFFER_BIT; } else { int read_s_bit, draw_s_bit; if (_mesa_is_gles3(ctx) && (drawRb == readRb)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(source and destination depth " - "buffer cannot be the same)"); + "%s(source and destination depth " + "buffer cannot be the same)", func); return; } @@ -377,7 +365,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, (_mesa_get_format_datatype(readRb->Format) != _mesa_get_format_datatype(drawRb->Format))) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(depth attachment format mismatch)"); + "%s(depth attachment format mismatch)", func); return; } @@ -389,8 +377,8 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, * we should ignore the stencil format check. */ if (read_s_bit > 0 && draw_s_bit > 0 && read_s_bit != draw_s_bit) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBlitFramebuffer" - "(depth attachment stencil bits mismatch)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(depth attachment stencil bits mismatch)", func); return; } } @@ -406,7 +394,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, */ if (drawFb->Visual.samples > 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(destination samples must be 0)"); + "%s(destination samples must be 0)", func); return; } @@ -426,7 +414,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, && (srcX0 != dstX0 || srcY0 != dstY0 || srcX1 != dstX1 || srcY1 != dstY1)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebuffer(bad src/dst multisample region)"); + "%s(bad src/dst multisample region)", func); return; } } else { @@ -434,7 +422,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, drawFb->Visual.samples > 0 && readFb->Visual.samples != drawFb->Visual.samples) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(mismatched samples)"); + "%s(mismatched samples)", func); return; } @@ -445,7 +433,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, if (abs(srcX1 - srcX0) != abs(dstX1 - dstX0) || abs(srcY1 - srcY0) != abs(dstY1 - dstY0)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBlitFramebufferEXT(bad src/dst multisample region sizes)"); + "%s(bad src/dst multisample region sizes)", func); return; } } @@ -457,43 +445,44 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, const struct gl_renderbuffer *colorDrawRb = NULL; GLuint i = 0; - printf("glBlitFramebuffer(%d, %d, %d, %d, %d, %d, %d, %d," - " 0x%x, 0x%x)\n", - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask, filter); + printf("%s(%d, %d, %d, %d, %d, %d, %d, %d," + " 0x%x, 0x%x)\n", func, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); + if (colorReadRb) { const struct gl_renderbuffer_attachment *att; att = find_attachment(readFb, colorReadRb); printf(" Src FBO %u RB %u (%dx%d) ", - readFb->Name, colorReadRb->Name, - colorReadRb->Width, colorReadRb->Height); + readFb->Name, colorReadRb->Name, + colorReadRb->Width, colorReadRb->Height); if (att && att->Texture) { printf("Tex %u tgt 0x%x level %u face %u", - att->Texture->Name, - att->Texture->Target, - att->TextureLevel, - att->CubeMapFace); + att->Texture->Name, + att->Texture->Target, + att->TextureLevel, + att->CubeMapFace); } printf("\n"); /* Print all active color render buffers */ - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - colorDrawRb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + for (i = 0; i < drawFb->_NumColorDrawBuffers; i++) { + colorDrawRb = drawFb->_ColorDrawBuffers[i]; if (!colorDrawRb) continue; att = find_attachment(drawFb, colorDrawRb); printf(" Dst FBO %u RB %u (%dx%d) ", - drawFb->Name, colorDrawRb->Name, - colorDrawRb->Width, colorDrawRb->Height); + drawFb->Name, colorDrawRb->Name, + colorDrawRb->Width, colorDrawRb->Height); if (att && att->Texture) { printf("Tex %u tgt 0x%x level %u face %u", - att->Texture->Name, - att->Texture->Target, - att->TextureLevel, - att->CubeMapFace); + att->Texture->Name, + att->Texture->Target, + att->TextureLevel, + att->CubeMapFace); } printf("\n"); } @@ -507,8 +496,87 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } assert(ctx->Driver.BlitFramebuffer); - ctx->Driver.BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer, + ctx->Driver.BlitFramebuffer(ctx, readFb, drawFb, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, mask, filter); } + + +/** + * Blit rectangular region, optionally from one framebuffer to another. + * + * Note, if the src buffer is multisampled and the dest is not, this is + * when the samples must be resolved to a single color. + */ +void GLAPIENTRY +_mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, + "glBlitFramebuffer(%d, %d, %d, %d, " + " %d, %d, %d, %d, 0x%x, %s)\n", + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, _mesa_lookup_enum_by_nr(filter)); + + _mesa_blit_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter, "glBlitFramebuffer"); +} + + +void GLAPIENTRY +_mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *readFb, *drawFb; + + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, + "glBlitNamedFramebuffer(%u %u %d, %d, %d, %d, " + " %d, %d, %d, %d, 0x%x, %s)\n", + readFramebuffer, drawFramebuffer, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, _mesa_lookup_enum_by_nr(filter)); + + /* + * According to PDF page 533 of the OpenGL 4.5 core spec (30.10.2014, + * Section 18.3 Copying Pixels): + * "... if readFramebuffer or drawFramebuffer is zero (for + * BlitNamedFramebuffer), then the default read or draw framebuffer is + * used as the corresponding source or destination framebuffer, + * respectively." + */ + if (readFramebuffer) { + readFb = _mesa_lookup_framebuffer_err(ctx, readFramebuffer, + "glBlitNamedFramebuffer"); + if (!readFb) + return; + } + else + readFb = ctx->WinSysReadBuffer; + + if (drawFramebuffer) { + drawFb = _mesa_lookup_framebuffer_err(ctx, drawFramebuffer, + "glBlitNamedFramebuffer"); + if (!drawFb) + return; + } + else + drawFb = ctx->WinSysDrawBuffer; + + _mesa_blit_framebuffer(ctx, readFb, drawFb, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter, "glBlitNamedFramebuffer"); +} diff --git a/src/mesa/main/blit.h b/src/mesa/main/blit.h index 01a958af5a2..54b946e3192 100644 --- a/src/mesa/main/blit.h +++ b/src/mesa/main/blit.h @@ -28,11 +28,24 @@ #include "glheader.h" +extern void +_mesa_blit_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter, const char *func); extern void GLAPIENTRY _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +extern void GLAPIENTRY +_mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter); + #endif /* BLIT_H */ diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 37a9790923b..0536266d756 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -242,16 +242,16 @@ read_buffer_enum_to_index(GLenum buffer) * * See the GL_EXT_framebuffer_object spec for more info. */ -void GLAPIENTRY -_mesa_DrawBuffer(GLenum buffer) +void +_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { GLbitfield destMask; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) { - _mesa_debug(ctx, "glDrawBuffer %s\n", _mesa_lookup_enum_by_nr(buffer)); + _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer)); } if (buffer == GL_NONE) { @@ -259,33 +259,60 @@ _mesa_DrawBuffer(GLenum buffer) } else { const GLbitfield supportedMask - = supported_buffer_bitmask(ctx, ctx->DrawBuffer); + = supported_buffer_bitmask(ctx, fb); destMask = draw_buffer_enum_to_bitmask(ctx, buffer); if (destMask == BAD_MASK) { /* totally bogus buffer */ - _mesa_error(ctx, GL_INVALID_ENUM, - "glDrawBuffer(buffer=0x%x)", buffer); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", caller, + _mesa_lookup_enum_by_nr(buffer)); return; } destMask &= supportedMask; if (destMask == 0x0) { /* none of the named color buffers exist! */ - _mesa_error(ctx, GL_INVALID_OPERATION, - "glDrawBuffer(buffer=0x%x)", buffer); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffer)); return; } } /* if we get here, there's no error so set new state */ - _mesa_drawbuffers(ctx, 1, &buffer, &destMask); + _mesa_drawbuffers(ctx, fb, 1, &buffer, &destMask); + + /* Call device driver function only if fb is the bound draw buffer */ + if (fb == ctx->DrawBuffer) { + if (ctx->Driver.DrawBuffers) + ctx->Driver.DrawBuffers(ctx, 1, &buffer); + else if (ctx->Driver.DrawBuffer) + ctx->Driver.DrawBuffer(ctx, buffer); + } +} - /* - * Call device driver function. - */ - if (ctx->Driver.DrawBuffers) - ctx->Driver.DrawBuffers(ctx, 1, &buffer); - else if (ctx->Driver.DrawBuffer) - ctx->Driver.DrawBuffer(ctx, buffer); + +void GLAPIENTRY +_mesa_DrawBuffer(GLenum buffer) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer"); +} + + +void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferDrawBuffer"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + _mesa_draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer"); } @@ -298,13 +325,13 @@ _mesa_DrawBuffer(GLenum buffer) * names cannot specify more than one buffer. For example, * GL_FRONT_AND_BACK is illegal. */ -void GLAPIENTRY -_mesa_DrawBuffers(GLsizei n, const GLenum *buffers) +void +_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLsizei n, const GLenum *buffers, const char *caller) { GLuint output; GLbitfield usedBufferMask, supportedMask; GLbitfield destMask[MAX_DRAW_BUFFERS]; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); @@ -315,12 +342,18 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * "An INVALID_VALUE error is generated if n is greater than * MAX_DRAW_BUFFERS." */ - if (n < 0 || n > (GLsizei) ctx->Const.MaxDrawBuffers) { - _mesa_error(ctx, GL_INVALID_VALUE, "glDrawBuffersARB(n)"); + if (n < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", caller); + return; + } + + if (n > (GLsizei) ctx->Const.MaxDrawBuffers) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(n > maximum number of draw buffers)", caller); return; } - supportedMask = supported_buffer_bitmask(ctx, ctx->DrawBuffer); + supportedMask = supported_buffer_bitmask(ctx, fb); usedBufferMask = 0x0; /* From the ES 3.0 specification, page 180: @@ -328,9 +361,9 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * and the constant must be BACK or NONE." * (same restriction applies with GL_EXT_draw_buffers specification) */ - if (ctx->API == API_OPENGLES2 && _mesa_is_winsys_fbo(ctx->DrawBuffer) && + if (ctx->API == API_OPENGLES2 && _mesa_is_winsys_fbo(fb) && (n != 1 || (buffers[0] != GL_NONE && buffers[0] != GL_BACK))) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffers)", caller); return; } @@ -362,9 +395,11 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * or equal to the value of MAX_COLOR_ATTACHMENTS, then the error * INVALID_OPERATION results." */ - if (_mesa_is_user_fbo(ctx->DrawBuffer) && buffers[output] >= + if (_mesa_is_user_fbo(fb) && buffers[output] >= GL_COLOR_ATTACHMENT0 + ctx->Const.MaxDrawBuffers) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffersARB(buffer)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(buffers[%d] >= maximum number of draw buffers)", + caller, output); return; } @@ -375,9 +410,10 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * 4.5 or 4.6. Otherwise, an INVALID_ENUM error is generated. */ if (destMask[output] == BAD_MASK) { - _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffersARB(buffer)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; - } + } /* From the OpenGL 4.0 specification, page 256: * "For both the default framebuffer and framebuffer objects, the @@ -390,7 +426,8 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * but the Khronos conformance tests expect INVALID_ENUM. */ if (_mesa_bitcount(destMask[output]) > 1) { - _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffersARB(buffer)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -407,7 +444,8 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) destMask[output] &= supportedMask; if (destMask[output] == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glDrawBuffersARB(unsupported buffer)"); + "%s(unsupported buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -416,10 +454,12 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * in bufs must be COLOR_ATTACHMENTi or NONE. [...] INVALID_OPERATION." * (same restriction applies with GL_EXT_draw_buffers specification) */ - if (ctx->API == API_OPENGLES2 && _mesa_is_user_fbo(ctx->DrawBuffer) && + if (ctx->API == API_OPENGLES2 && _mesa_is_user_fbo(fb) && buffers[output] != GL_NONE && buffers[output] != GL_COLOR_ATTACHMENT0 + output) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(unsupported buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -430,7 +470,8 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) */ if (destMask[output] & usedBufferMask) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glDrawBuffersARB(duplicated buffer)"); + "%s(duplicated buffer %s)", + caller, _mesa_lookup_enum_by_nr(buffers[output])); return; } @@ -440,17 +481,48 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) } /* OK, if we get here, there were no errors so set the new state */ - _mesa_drawbuffers(ctx, n, buffers, destMask); + _mesa_drawbuffers(ctx, fb, n, buffers, destMask); /* - * Call device driver function. Note that n can be equal to 0, + * Call device driver function if fb is the bound draw buffer. + * Note that n can be equal to 0, * in which case we don't want to reference buffers[0], which * may not be valid. */ - if (ctx->Driver.DrawBuffers) - ctx->Driver.DrawBuffers(ctx, n, buffers); - else if (ctx->Driver.DrawBuffer) - ctx->Driver.DrawBuffer(ctx, n > 0 ? buffers[0] : GL_NONE); + if (fb == ctx->DrawBuffer) { + if (ctx->Driver.DrawBuffers) + ctx->Driver.DrawBuffers(ctx, n, buffers); + else if (ctx->Driver.DrawBuffer) + ctx->Driver.DrawBuffer(ctx, n > 0 ? buffers[0] : GL_NONE); + } +} + + +void GLAPIENTRY +_mesa_DrawBuffers(GLsizei n, const GLenum *buffers) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers"); +} + + +void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, + const GLenum *bufs) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferDrawBuffers"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + _mesa_draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers"); } @@ -459,13 +531,11 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * actual change. */ static void -updated_drawbuffers(struct gl_context *ctx) +updated_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb) { FLUSH_VERTICES(ctx, _NEW_BUFFERS); if (ctx->API == API_OPENGL_COMPAT && !ctx->Extensions.ARB_ES2_compatibility) { - struct gl_framebuffer *fb = ctx->DrawBuffer; - /* Flag the FBO as requiring validation. */ if (_mesa_is_user_fbo(fb)) { fb->_Status = 0; @@ -482,6 +552,7 @@ updated_drawbuffers(struct gl_context *ctx) * so nothing should go wrong at this point. * * \param ctx current context + * \param fb the desired draw buffer * \param n number of color outputs to set * \param buffers array[n] of colorbuffer names, like GL_LEFT. * \param destMask array[n] of BUFFER_BIT_* bitmasks which correspond to the @@ -489,10 +560,9 @@ updated_drawbuffers(struct gl_context *ctx) * BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT). */ void -_mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, - const GLbitfield *destMask) +_mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLuint n, const GLenum *buffers, const GLbitfield *destMask) { - struct gl_framebuffer *fb = ctx->DrawBuffer; GLbitfield mask[MAX_DRAW_BUFFERS]; GLuint buf; @@ -518,7 +588,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, while (destMask0) { GLint bufIndex = ffs(destMask0) - 1; if (fb->_ColorDrawBufferIndexes[count] != bufIndex) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[count] = bufIndex; } count++; @@ -535,14 +605,14 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /* only one bit should be set in the destMask[buf] field */ assert(_mesa_bitcount(destMask[buf]) == 1); if (fb->_ColorDrawBufferIndexes[buf] != bufIndex) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[buf] = bufIndex; } count = buf + 1; } else { if (fb->_ColorDrawBufferIndexes[buf] != -1) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[buf] = -1; } } @@ -554,7 +624,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /* set remaining outputs to -1 (GL_NONE) */ for (buf = fb->_NumColorDrawBuffers; buf < ctx->Const.MaxDrawBuffers; buf++) { if (fb->_ColorDrawBufferIndexes[buf] != -1) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); fb->_ColorDrawBufferIndexes[buf] = -1; } } @@ -566,7 +636,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /* also set context drawbuffer state */ for (buf = 0; buf < ctx->Const.MaxDrawBuffers; buf++) { if (ctx->Color.DrawBuffer[buf] != fb->ColorDrawBuffer[buf]) { - updated_drawbuffers(ctx); + updated_drawbuffers(ctx, fb); ctx->Color.DrawBuffer[buf] = fb->ColorDrawBuffer[buf]; } } @@ -585,7 +655,7 @@ _mesa_update_draw_buffers(struct gl_context *ctx) /* should be a window system FBO */ assert(_mesa_is_winsys_fbo(ctx->DrawBuffer)); - _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, + _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers, ctx->Color.DrawBuffer, NULL); } @@ -598,11 +668,10 @@ _mesa_update_draw_buffers(struct gl_context *ctx) * \param bufferIndex the numerical index corresponding to 'buffer' */ void -_mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex) +_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, GLint bufferIndex) { - struct gl_framebuffer *fb = ctx->ReadBuffer; - - if (_mesa_is_winsys_fbo(fb)) { + if ((fb == ctx->ReadBuffer) && _mesa_is_winsys_fbo(fb)) { /* Only update the per-context READ_BUFFER state if we're bound to * a window-system framebuffer. */ @@ -621,23 +690,17 @@ _mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex) * Called by glReadBuffer to set the source renderbuffer for reading pixels. * \param mode color buffer such as GL_FRONT, GL_BACK, etc. */ -void GLAPIENTRY -_mesa_ReadBuffer(GLenum buffer) +void +_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { - struct gl_framebuffer *fb; GLbitfield supportedMask; GLint srcBuffer; - GET_CURRENT_CONTEXT(ctx); FLUSH_VERTICES(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glReadBuffer %s\n", _mesa_lookup_enum_by_nr(buffer)); - - fb = ctx->ReadBuffer; - - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glReadBuffer %s\n", _mesa_lookup_enum_by_nr(buffer)); + _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer)); if (buffer == GL_NONE) { /* This is legal--it means that no buffer should be bound for reading. */ @@ -648,24 +711,53 @@ _mesa_ReadBuffer(GLenum buffer) srcBuffer = read_buffer_enum_to_index(buffer); if (srcBuffer == -1) { _mesa_error(ctx, GL_INVALID_ENUM, - "glReadBuffer(buffer=0x%x)", buffer); + "%s(invalid buffer %s)", caller, + _mesa_lookup_enum_by_nr(buffer)); return; } supportedMask = supported_buffer_bitmask(ctx, fb); if (((1 << srcBuffer) & supportedMask) == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glReadBuffer(buffer=0x%x)", buffer); + "%s(invalid buffer %s)", caller, + _mesa_lookup_enum_by_nr(buffer)); return; } } /* OK, all error checking has been completed now */ - _mesa_readbuffer(ctx, buffer, srcBuffer); + _mesa_readbuffer(ctx, fb, buffer, srcBuffer); - /* - * Call device driver function. - */ - if (ctx->Driver.ReadBuffer) - (*ctx->Driver.ReadBuffer)(ctx, buffer); + /* Call the device driver function only if fb is the bound read buffer */ + if (fb == ctx->ReadBuffer) { + if (ctx->Driver.ReadBuffer) + (*ctx->Driver.ReadBuffer)(ctx, buffer); + } +} + + +void GLAPIENTRY +_mesa_ReadBuffer(GLenum buffer) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer"); +} + + +void GLAPIENTRY +_mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferReadBuffer"); + if (!fb) + return; + } + else + fb = ctx->WinSysReadBuffer; + + _mesa_read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer"); } diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index ebcfa1c1e74..5aa79fda54b 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -36,26 +36,51 @@ #include "glheader.h" struct gl_context; +struct gl_framebuffer; + +extern void +_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller); extern void GLAPIENTRY _mesa_DrawBuffer( GLenum mode ); extern void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf); + +extern void +_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLsizei n, const GLenum *buffers, const char *caller); + +extern void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers); +extern void GLAPIENTRY +_mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, + const GLenum *bufs); + extern void -_mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, +_mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLuint n, const GLenum *buffers, const GLbitfield *destMask); extern void -_mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex); +_mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, GLint bufferIndex); extern void _mesa_update_draw_buffers(struct gl_context *ctx); +extern void +_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller); + extern void GLAPIENTRY _mesa_ReadBuffer( GLenum mode ); +extern void GLAPIENTRY +_mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src); + #endif diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 8d707bc34a1..426caea4709 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -34,6 +34,8 @@ #include "clear.h" #include "context.h" #include "enums.h" +#include "fbobject.h" +#include "get.h" #include "macros.h" #include "mtypes.h" #include "state.h" @@ -400,6 +402,24 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) /** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLint *value) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferiv(buffer, drawbuffer, value); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} + + +/** * New in GL 3.0 * Clear unsigned integer color buffer (not depth, not stencil). */ @@ -472,6 +492,24 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value) /** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLuint *value) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferuiv(buffer, drawbuffer, value); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} + + +/** * New in GL 3.0 * Clear fixed-pt or float color buffer or depth buffer (not stencil). */ @@ -565,6 +603,24 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value) /** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLfloat *value) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferfv(buffer, drawbuffer, value); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} + + +/** * New in GL 3.0 * Clear depth/stencil buffer only. */ @@ -626,3 +682,21 @@ _mesa_ClearBufferfi(GLenum buffer, GLint drawbuffer, ctx->Stencil.Clear = clearStencilSave; } } + + +/** + * The ClearBuffer framework is so complicated and so riddled with the + * assumption that the framebuffer is bound that, for now, we will just fake + * direct state access clearing for the user. + */ +void GLAPIENTRY +_mesa_ClearNamedFramebufferfi(GLuint framebuffer, GLenum buffer, + GLfloat depth, GLint stencil) +{ + GLint oldfb; + + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldfb); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + _mesa_ClearBufferfi(buffer, 0, depth, stencil); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, (GLuint) oldfb); +} diff --git a/src/mesa/main/clear.h b/src/mesa/main/clear.h index 96ce47b929e..c29850676ca 100644 --- a/src/mesa/main/clear.h +++ b/src/mesa/main/clear.h @@ -52,13 +52,29 @@ extern void GLAPIENTRY _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value); extern void GLAPIENTRY +_mesa_ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLint *value); + +extern void GLAPIENTRY _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value); extern void GLAPIENTRY +_mesa_ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLuint *value); + +extern void GLAPIENTRY _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value); extern void GLAPIENTRY +_mesa_ClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, + GLint drawbuffer, const GLfloat *value); + +extern void GLAPIENTRY _mesa_ClearBufferfi(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +extern void GLAPIENTRY +_mesa_ClearNamedFramebufferfi(GLuint framebuffer, GLenum buffer, + GLfloat depth, GLint stencil); + #endif diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 5a66a4eec90..9c3baf4c6aa 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -213,19 +213,10 @@ /** For GL_ARB_fragment_program */ /*@{*/ #define MAX_FRAGMENT_PROGRAM_ADDRESS_REGS 0 +#define MAX_FRAGMENT_PROGRAM_PARAMS 64 +#define MAX_FRAGMENT_PROGRAM_INPUTS 12 /*@}*/ -/** For GL_NV_fragment_program */ -/*@{*/ -#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ -#define MAX_NV_FRAGMENT_PROGRAM_TEMPS 96 -#define MAX_NV_FRAGMENT_PROGRAM_PARAMS 64 -#define MAX_NV_FRAGMENT_PROGRAM_INPUTS 12 -#define MAX_NV_FRAGMENT_PROGRAM_OUTPUTS 3 -#define MAX_NV_FRAGMENT_PROGRAM_WRITE_ONLYS 2 -/*@}*/ - - /** For GL_ARB_vertex_shader */ /*@{*/ #define MAX_VERTEX_GENERIC_ATTRIBS 16 diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 0a192de8c0a..79fa01849e0 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -489,8 +489,8 @@ init_program_limits(struct gl_constants *consts, gl_shader_stage stage, prog->MaxOutputComponents = 16 * 4; /* old limit not to break tnl and swrast */ break; case MESA_SHADER_FRAGMENT: - prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS; - prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS; + prog->MaxParameters = MAX_FRAGMENT_PROGRAM_PARAMS; + prog->MaxAttribs = MAX_FRAGMENT_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS; prog->MaxUniformComponents = 4 * MAX_UNIFORMS; prog->MaxInputComponents = 16 * 4; /* old limit not to break tnl and swrast */ @@ -883,6 +883,19 @@ update_default_objects(struct gl_context *ctx) } +/* XXX this is temporary and should be removed at some point in the + * future when there's a reasonable expectation that the libGL library + * contains the _glapi_new_nop_table() and _glapi_set_nop_handler() + * functions which were added in Mesa 10.6. + */ +#if !defined(_WIN32) +/* Avoid libGL / driver ABI break */ +#define USE_GLAPI_NOP_FEATURES 0 +#else +#define USE_GLAPI_NOP_FEATURES 1 +#endif + + /** * This function is called by the glapi no-op functions. For each OpenGL * function/entrypoint there's a simple no-op function. These "no-op" @@ -898,6 +911,7 @@ update_default_objects(struct gl_context *ctx) * * \param name the name of the OpenGL function */ +#if USE_GLAPI_NOP_FEATURES static void nop_handler(const char *name) { @@ -914,6 +928,7 @@ nop_handler(const char *name) } #endif } +#endif /** @@ -923,12 +938,52 @@ nop_handler(const char *name) static void GLAPIENTRY nop_glFlush(void) { - /* don't record an error like we do in _mesa_generic_nop() */ + /* don't record an error like we do in nop_handler() */ +} +#endif + + +#if !USE_GLAPI_NOP_FEATURES +static int +generic_nop(void) +{ + GET_CURRENT_CONTEXT(ctx); + _mesa_error(ctx, GL_INVALID_OPERATION, + "unsupported function called " + "(unsupported extension or deprecated function?)"); + return 0; } #endif /** + * Create a new API dispatch table in which all entries point to the + * generic_nop() function. This will not work on Windows because of + * the __stdcall convention which requires the callee to clean up the + * call stack. That's impossible with one generic no-op function. + */ +struct _glapi_table * +_mesa_new_nop_table(unsigned numEntries) +{ + struct _glapi_table *table; + +#if !USE_GLAPI_NOP_FEATURES + table = malloc(numEntries * sizeof(_glapi_proc)); + if (table) { + _glapi_proc *entry = (_glapi_proc *) table; + unsigned i; + for (i = 0; i < numEntries; i++) { + entry[i] = (_glapi_proc) generic_nop; + } + } +#else + table = _glapi_new_nop_table(numEntries); +#endif + return table; +} + + +/** * Allocate and initialize a new dispatch table. The table will be * populated with pointers to "no-op" functions. In turn, the no-op * functions will call nop_handler() above. @@ -941,8 +996,9 @@ alloc_dispatch_table(void) * Mesa we do this to accommodate different versions of libGL and various * DRI drivers. */ - GLint numEntries = MAX2(_glapi_get_dispatch_table_size(), _gloffset_COUNT); - struct _glapi_table *table = _glapi_new_nop_table(numEntries); + int numEntries = MAX2(_glapi_get_dispatch_table_size(), _gloffset_COUNT); + + struct _glapi_table *table = _mesa_new_nop_table(numEntries); #if defined(_WIN32) if (table) { @@ -966,7 +1022,9 @@ alloc_dispatch_table(void) } #endif +#if USE_GLAPI_NOP_FEATURES _glapi_set_nop_handler(nop_handler); +#endif return table; } @@ -1111,9 +1169,7 @@ _mesa_initialize_context(struct gl_context *ctx, ctx->HasConfig = GL_FALSE; } - if (_mesa_is_desktop_gl(ctx)) { - _mesa_override_gl_version(ctx); - } + _mesa_override_gl_version(ctx); /* misc one-time initializations */ one_time_init(ctx); @@ -1275,7 +1331,6 @@ _mesa_free_context_data( struct gl_context *ctx ) _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL); - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL); _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); @@ -1565,7 +1620,8 @@ handle_first_current(struct gl_context *ctx) else buffer = GL_FRONT; - _mesa_drawbuffers(ctx, 1, &buffer, NULL /* destMask */); + _mesa_drawbuffers(ctx, ctx->DrawBuffer, 1, &buffer, + NULL /* destMask */); } if (ctx->ReadBuffer != _mesa_get_incomplete_framebuffer()) { @@ -1578,7 +1634,7 @@ handle_first_current(struct gl_context *ctx) bufferIndex = BUFFER_FRONT_LEFT; } - _mesa_readbuffer(ctx, buffer, bufferIndex); + _mesa_readbuffer(ctx, ctx->ReadBuffer, buffer, bufferIndex); } } diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c index fd22f28892c..e8732c6175b 100644 --- a/src/mesa/main/copyimage.c +++ b/src/mesa/main/copyimage.c @@ -40,14 +40,25 @@ enum mesa_block_class { BLOCK_CLASS_64_BITS }; +/** + * Prepare the source or destination resource, including: + * - Error checking + * - Creating texture wrappers for renderbuffers + * \param name the texture or renderbuffer name + * \param target GL_TEXTURE target or GL_RENDERBUFFER. For the later, will + * be changed to a compatible GL_TEXTURE target. + * \param level mipmap level + * \param tex_obj returns a pointer to a texture object + * \param tex_image returns a pointer to a texture image + * \param tmp_tex returns temporary texture object name + * \return true if success, false if error + */ static bool prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, struct gl_texture_object **tex_obj, struct gl_texture_image **tex_image, GLuint *tmp_tex, const char *dbg_prefix) { - struct gl_renderbuffer *rb; - if (name == 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glCopyImageSubData(%sName = %d)", dbg_prefix, name); @@ -87,7 +98,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, } if (*target == GL_RENDERBUFFER) { - rb = _mesa_lookup_renderbuffer(ctx, name); + struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name); if (!rb) { _mesa_error(ctx, GL_INVALID_VALUE, "glCopyImageSubData(%sName = %u)", dbg_prefix, name); @@ -169,8 +180,15 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, return true; } + +/** + * Check that the x,y,z,width,height,region is within the texture image + * dimensions. + * \return true if bounds OK, false if regions is out of bounds + */ static bool -check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image, +check_region_bounds(struct gl_context *ctx, + const struct gl_texture_image *tex_image, int x, int y, int z, int width, int height, int depth, const char *dbg_prefix) { @@ -188,6 +206,7 @@ check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image, return false; } + /* Check X direction */ if (x + width > tex_image->Width) { _mesa_error(ctx, GL_INVALID_VALUE, "glCopyImageSubData(%sX or %sWidth exceeds image bounds)", @@ -195,6 +214,7 @@ check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image, return false; } + /* Check Y direction */ switch (tex_image->TexObject->Target) { case GL_TEXTURE_1D: case GL_TEXTURE_1D_ARRAY: @@ -215,6 +235,7 @@ check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image, break; } + /* Check Z direction */ switch (tex_image->TexObject->Target) { case GL_TEXTURE_1D: case GL_TEXTURE_2D: @@ -260,7 +281,7 @@ check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image, } static bool -compressed_format_compatible(struct gl_context *ctx, +compressed_format_compatible(const struct gl_context *ctx, GLenum compressedFormat, GLenum otherFormat) { enum mesa_block_class compressedClass, otherClass; @@ -348,8 +369,8 @@ compressed_format_compatible(struct gl_context *ctx, } static bool -copy_format_compatible(struct gl_context *ctx, - GLenum srcFormat, GLenum dstFormat) +copy_format_compatible(const struct gl_context *ctx, + GLenum srcFormat, GLenum dstFormat) { /* * From ARB_copy_image spec: @@ -389,7 +410,7 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel, struct gl_texture_object *srcTexObj, *dstTexObj; struct gl_texture_image *srcTexImage, *dstTexImage; GLuint src_bw, src_bh, dst_bw, dst_bh; - int i, srcNewZ, dstNewZ; + int i; if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glCopyImageSubData(%u, %s, %d, %d, %d, %d, " @@ -447,6 +468,8 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel, } for (i = 0; i < srcDepth; ++i) { + int srcNewZ, dstNewZ; + if (srcTexObj->Target == GL_TEXTURE_CUBE_MAP) { srcTexImage = srcTexObj->Image[i + srcZ][srcLevel]; srcNewZ = 0; diff --git a/src/mesa/main/depth.c b/src/mesa/main/depth.c index 29851ecb8a4..bb4591cf152 100644 --- a/src/mesa/main/depth.c +++ b/src/mesa/main/depth.c @@ -65,6 +65,9 @@ _mesa_DepthFunc( GLenum func ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_lookup_enum_by_nr(func)); + if (ctx->Depth.Func == func) + return; + switch (func) { case GL_LESS: /* (default) pass if incoming z < stored z */ case GL_GEQUAL: @@ -80,9 +83,6 @@ _mesa_DepthFunc( GLenum func ) return; } - if (ctx->Depth.Func == func) - return; - FLUSH_VERTICES(ctx, _NEW_DEPTH); ctx->Depth.Func = func; diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 431c4b48b79..aafe486fb60 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -7592,28 +7592,6 @@ save_FramebufferTexture(GLenum target, GLenum attachment, } } -static void GLAPIENTRY -save_FramebufferTextureFace(GLenum target, GLenum attachment, - GLuint texture, GLint level, GLenum face) -{ - Node *n; - GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx); - n = alloc_instruction(ctx, OPCODE_FRAMEBUFFER_TEXTURE_FACE, 5); - if (n) { - n[1].e = target; - n[2].e = attachment; - n[3].ui = texture; - n[4].i = level; - n[5].e = face; - } - if (ctx->ExecuteFlag) { - CALL_FramebufferTextureFaceARB(ctx->Exec, (target, attachment, texture, - level, face)); - } -} - - static void GLAPIENTRY save_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout) @@ -8873,11 +8851,6 @@ execute_list(struct gl_context *ctx, GLuint list) CALL_FramebufferTexture(ctx->Exec, (n[1].e, n[2].e, n[3].ui, n[4].i)); break; - case OPCODE_FRAMEBUFFER_TEXTURE_FACE: - CALL_FramebufferTextureFaceARB(ctx->Exec, (n[1].e, n[2].e, - n[3].ui, n[4].i, n[5].e)); - break; - /* GL_ARB_sync */ case OPCODE_WAIT_SYNC: { @@ -9644,10 +9617,9 @@ _mesa_initialize_save_table(const struct gl_context *ctx) SET_BlendEquationiARB(table, save_BlendEquationi); SET_BlendEquationSeparateiARB(table, save_BlendEquationSeparatei); - /* GL_ARB_geometry_shader4 */ + /* OpenGL 3.2 */ SET_ProgramParameteri(table, save_ProgramParameteri); SET_FramebufferTexture(table, save_FramebufferTexture); - SET_FramebufferTextureFaceARB(table, save_FramebufferTextureFace); /* GL_NV_conditional_render */ SET_BeginConditionalRender(table, save_BeginConditionalRender); diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c index 2aa1deb635f..b3406665d94 100644 --- a/src/mesa/main/errors.c +++ b/src/mesa/main/errors.c @@ -39,6 +39,7 @@ #include "mtypes.h" #include "version.h" #include "util/hash_table.h" +#include "util/simple_list.h" static mtx_t DynamicIDMutex = _MTX_INITIALIZER_NP; static GLuint NextDynamicID = 1; @@ -1412,6 +1413,26 @@ should_output(struct gl_context *ctx, GLenum error, const char *fmtString) void +_mesa_gl_vdebug(struct gl_context *ctx, + GLuint *id, + enum mesa_debug_source source, + enum mesa_debug_type type, + enum mesa_debug_severity severity, + const char *fmtString, + va_list args) +{ + char s[MAX_DEBUG_MESSAGE_LENGTH]; + int len; + + debug_get_id(id); + + len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args); + + log_msg(ctx, source, type, *id, severity, len, s); +} + + +void _mesa_gl_debug(struct gl_context *ctx, GLuint *id, enum mesa_debug_source source, @@ -1419,17 +1440,10 @@ _mesa_gl_debug(struct gl_context *ctx, enum mesa_debug_severity severity, const char *fmtString, ...) { - char s[MAX_DEBUG_MESSAGE_LENGTH]; - int len; va_list args; - - debug_get_id(id); - va_start(args, fmtString); - len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args); + _mesa_gl_vdebug(ctx, id, source, type, severity, fmtString, args); va_end(args); - - log_msg(ctx, source, type, *id, severity, len, s); } diff --git a/src/mesa/main/errors.h b/src/mesa/main/errors.h index e6dc9b5f1b9..24f234f7f10 100644 --- a/src/mesa/main/errors.h +++ b/src/mesa/main/errors.h @@ -76,6 +76,15 @@ extern FILE * _mesa_get_log_file(void); extern void +_mesa_gl_vdebug(struct gl_context *ctx, + GLuint *id, + enum mesa_debug_source source, + enum mesa_debug_type type, + enum mesa_debug_severity severity, + const char *fmtString, + va_list args); + +extern void _mesa_gl_debug(struct gl_context *ctx, GLuint *id, enum mesa_debug_source source, diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index f7ce0642aef..4176a69ed7c 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -104,7 +104,7 @@ static const struct extension extension_table[] = { { "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 }, { "GL_ARB_depth_texture", o(ARB_depth_texture), GLL, 2001 }, { "GL_ARB_derivative_control", o(ARB_derivative_control), GL, 2014 }, - { "GL_ARB_direct_state_access", o(dummy_false), GL, 2014 }, + { "GL_ARB_direct_state_access", o(dummy_true), GLC, 2014 }, { "GL_ARB_draw_buffers", o(dummy_true), GL, 2002 }, { "GL_ARB_draw_buffers_blend", o(ARB_draw_buffers_blend), GL, 2009 }, { "GL_ARB_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), GL, 2009 }, @@ -117,6 +117,7 @@ static const struct extension extension_table[] = { { "GL_ARB_fragment_program", o(ARB_fragment_program), GLL, 2002 }, { "GL_ARB_fragment_program_shadow", o(ARB_fragment_program_shadow), GLL, 2003 }, { "GL_ARB_fragment_shader", o(ARB_fragment_shader), GL, 2002 }, + { "GL_ARB_framebuffer_no_attachments", o(ARB_framebuffer_no_attachments), GL, 2012 }, { "GL_ARB_framebuffer_object", o(ARB_framebuffer_object), GL, 2005 }, { "GL_ARB_framebuffer_sRGB", o(EXT_framebuffer_sRGB), GL, 1998 }, { "GL_ARB_get_program_binary", o(dummy_true), GL, 2010 }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 27cf97f1778..f8dcf122d99 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -121,6 +121,27 @@ _mesa_lookup_renderbuffer(struct gl_context *ctx, GLuint id) /** + * A convenience function for direct state access that throws + * GL_INVALID_OPERATION if the renderbuffer doesn't exist. + */ +struct gl_renderbuffer * +_mesa_lookup_renderbuffer_err(struct gl_context *ctx, GLuint id, + const char *func) +{ + struct gl_renderbuffer *rb; + + rb = _mesa_lookup_renderbuffer(ctx, id); + if (!rb || rb == &DummyRenderbuffer) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(non-existent renderbuffer %u)", func, id); + return NULL; + } + + return rb; +} + + +/** * Helper routine for getting a gl_framebuffer. */ struct gl_framebuffer * @@ -138,6 +159,27 @@ _mesa_lookup_framebuffer(struct gl_context *ctx, GLuint id) /** + * A convenience function for direct state access that throws + * GL_INVALID_OPERATION if the framebuffer doesn't exist. + */ +struct gl_framebuffer * +_mesa_lookup_framebuffer_err(struct gl_context *ctx, GLuint id, + const char *func) +{ + struct gl_framebuffer *fb; + + fb = _mesa_lookup_framebuffer(ctx, id); + if (!fb || fb == &DummyFramebuffer) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(non-existent framebuffer %u)", func, id); + return NULL; + } + + return fb; +} + + +/** * Mark the given framebuffer as invalid. This will force the * test for framebuffer completeness to be done before the framebuffer * is used. @@ -423,7 +465,7 @@ set_texture_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, struct gl_renderbuffer_attachment *att, struct gl_texture_object *texObj, - GLenum texTarget, GLuint level, GLuint zoffset, + GLenum texTarget, GLuint level, GLuint layer, GLboolean layered) { struct gl_renderbuffer *rb = att->Renderbuffer; @@ -447,7 +489,7 @@ set_texture_attachment(struct gl_context *ctx, /* always update these fields */ att->TextureLevel = level; att->CubeMapFace = _mesa_tex_target_to_face(texTarget); - att->Zoffset = zoffset; + att->Zoffset = layer; att->Layered = layered; att->Complete = GL_FALSE; @@ -479,9 +521,10 @@ set_renderbuffer_attachment(struct gl_context *ctx, * Attach a renderbuffer object to a framebuffer object. */ void -_mesa_framebuffer_renderbuffer(struct gl_context *ctx, - struct gl_framebuffer *fb, - GLenum attachment, struct gl_renderbuffer *rb) +_mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb) { struct gl_renderbuffer_attachment *att; @@ -914,6 +957,7 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, fb->Height = 0; fb->_AllColorBuffersFixedPoint = GL_TRUE; fb->_HasSNormOrFloatColorBuffer = GL_FALSE; + fb->_HasAttachments = true; /* Start at -2 to more easily loop over all attachment points. * -2: depth buffer @@ -1112,14 +1156,48 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, } else if (att_layer_count > max_layer_count) { max_layer_count = att_layer_count; } + + /* + * The extension GL_ARB_framebuffer_no_attachments places additional + * requirement on each attachment. Those additional requirements are + * tighter that those of previous versions of GL. In interest of better + * compatibility, we will not enforce these restrictions. For the record + * those additional restrictions are quoted below: + * + * "The width and height of image are greater than zero and less than or + * equal to the values of the implementation-dependent limits + * MAX_FRAMEBUFFER_WIDTH and MAX_FRAMEBUFFER_HEIGHT, respectively." + * + * "If <image> is a three-dimensional texture or a one- or two-dimensional + * array texture and the attachment is layered, the depth or layer count + * of the texture is less than or equal to the implementation-dependent + * limit MAX_FRAMEBUFFER_LAYERS." + * + * "If image has multiple samples, its sample count is less than or equal + * to the value of the implementation-dependent limit + * MAX_FRAMEBUFFER_SAMPLES." + * + * The same requirements are also in place for GL 4.5, + * Section 9.4.1 "Framebuffer Attachment Completeness", pg 310-311 + */ } fb->MaxNumLayers = max_layer_count; if (numImages == 0) { - fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; - fbo_incomplete(ctx, "no attachments", -1); - return; + fb->_HasAttachments = false; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments", -1); + return; + } + + if (fb->DefaultGeometry.Width == 0 || fb->DefaultGeometry.Height == 0) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments and default width or height is 0", -1); + return; + } } if (_mesa_is_desktop_gl(ctx) && !ctx->Extensions.ARB_ES2_compatibility) { @@ -1184,8 +1262,10 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, * renderbuffers/textures are different sizes, the framebuffer * width/height will be set to the smallest width/height. */ - fb->Width = minWidth; - fb->Height = minHeight; + if (numImages != 0) { + fb->Width = minWidth; + fb->Height = minHeight; + } /* finally, update the visual info for the framebuffer */ _mesa_update_framebuffer_visual(ctx, fb); @@ -1291,6 +1371,131 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer) bind_renderbuffer(target, renderbuffer, true); } +static void +framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum pname, GLint param, const char *func) +{ + switch (pname) { + case GL_FRAMEBUFFER_DEFAULT_WIDTH: + if (param < 0 || param > ctx->Const.MaxFramebufferWidth) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Width = param; + break; + case GL_FRAMEBUFFER_DEFAULT_HEIGHT: + if (param < 0 || param > ctx->Const.MaxFramebufferHeight) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Height = param; + break; + case GL_FRAMEBUFFER_DEFAULT_LAYERS: + if (param < 0 || param > ctx->Const.MaxFramebufferLayers) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Layers = param; + break; + case GL_FRAMEBUFFER_DEFAULT_SAMPLES: + if (param < 0 || param > ctx->Const.MaxFramebufferSamples) + _mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.NumSamples = param; + break; + case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS: + fb->DefaultGeometry.FixedSampleLocations = param; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(pname=0x%x)", func, pname); + } +} + +void GLAPIENTRY +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferParameteriv not supported " + "(ARB_framebuffer_no_attachments not implemented)"); + return; + } + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferParameteri(target=0x%x)", target); + return; + } + + /* check framebuffer binding */ + if (_mesa_is_winsys_fbo(fb)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferParameteri"); + return; + } + + framebuffer_parameteri(ctx, fb, pname, param, "glFramebufferParameteri"); +} + +static void +get_framebuffer_parameteriv(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum pname, GLint *params, const char *func) +{ + switch (pname) { + case GL_FRAMEBUFFER_DEFAULT_WIDTH: + *params = fb->DefaultGeometry.Width; + break; + case GL_FRAMEBUFFER_DEFAULT_HEIGHT: + *params = fb->DefaultGeometry.Height; + break; + case GL_FRAMEBUFFER_DEFAULT_LAYERS: + *params = fb->DefaultGeometry.Layers; + break; + case GL_FRAMEBUFFER_DEFAULT_SAMPLES: + *params = fb->DefaultGeometry.NumSamples; + break; + case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS: + *params = fb->DefaultGeometry.FixedSampleLocations; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(pname=0x%x)", func, pname); + } +} + +void GLAPIENTRY +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetFramebufferParameteriv not supported " + "(ARB_framebuffer_no_attachments not implemented)"); + return; + } + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferParameteriv(target=0x%x)", target); + return; + } + + /* check framebuffer binding */ + if (_mesa_is_winsys_fbo(fb)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetFramebufferParameteriv"); + return; + } + + get_framebuffer_parameteriv(ctx, fb, pname, params, + "glGetFramebufferParameteriv"); +} + /** * Remove the specified renderbuffer or texture from any attachment point in @@ -2396,15 +2601,23 @@ _mesa_DeleteFramebuffers(GLsizei n, const GLuint *framebuffers) } -void GLAPIENTRY -_mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers) +/** + * This is the implementation for glGenFramebuffers and glCreateFramebuffers. + * It is not exposed to the rest of Mesa to encourage the use of + * nameless buffers in driver internals. + */ +static void +create_framebuffers(GLsizei n, GLuint *framebuffers, bool dsa) { GET_CURRENT_CONTEXT(ctx); GLuint first; GLint i; + struct gl_framebuffer *fb; + + const char *func = dsa ? "glCreateFramebuffers" : "glGenFramebuffers"; if (n < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGenFramebuffersEXT(n)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func); return; } @@ -2416,31 +2629,43 @@ _mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers) for (i = 0; i < n; i++) { GLuint name = first + i; framebuffers[i] = name; - /* insert dummy placeholder into hash table */ + + if (dsa) { + fb = ctx->Driver.NewFramebuffer(ctx, framebuffers[i]); + if (!fb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); + return; + } + } + else + fb = &DummyFramebuffer; + mtx_lock(&ctx->Shared->Mutex); - _mesa_HashInsert(ctx->Shared->FrameBuffers, name, &DummyFramebuffer); + _mesa_HashInsert(ctx->Shared->FrameBuffers, name, fb); mtx_unlock(&ctx->Shared->Mutex); } } -GLenum GLAPIENTRY -_mesa_CheckFramebufferStatus(GLenum target) +void GLAPIENTRY +_mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers) { - struct gl_framebuffer *buffer; - GET_CURRENT_CONTEXT(ctx); + create_framebuffers(n, framebuffers, false); +} - ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0); - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glCheckFramebufferStatus(%s)\n", - _mesa_lookup_enum_by_nr(target)); +void GLAPIENTRY +_mesa_CreateFramebuffers(GLsizei n, GLuint *framebuffers) +{ + create_framebuffers(n, framebuffers, true); +} - buffer = get_framebuffer_target(ctx, target); - if (!buffer) { - _mesa_error(ctx, GL_INVALID_ENUM, "glCheckFramebufferStatus(target)"); - return 0; - } + +GLenum +_mesa_check_framebuffer_status(struct gl_context *ctx, + struct gl_framebuffer *buffer) +{ + ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0); if (_mesa_is_winsys_fbo(buffer)) { /* EGL_KHR_surfaceless_context allows the winsys FBO to be incomplete. */ @@ -2461,6 +2686,67 @@ _mesa_CheckFramebufferStatus(GLenum target) } +GLenum GLAPIENTRY +_mesa_CheckFramebufferStatus(GLenum target) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glCheckFramebufferStatus(%s)\n", + _mesa_lookup_enum_by_nr(target)); + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glCheckFramebufferStatus(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return 0; + } + + return _mesa_check_framebuffer_status(ctx, fb); +} + + +GLenum GLAPIENTRY +_mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + /* Validate the target (for conformance's sake) and grab a reference to the + * default framebuffer in case framebuffer = 0. + * Section 9.4 Framebuffer Completeness of the OpenGL 4.5 core spec + * (30.10.2014, PDF page 336) says: + * "If framebuffer is zero, then the status of the default read or + * draw framebuffer (as determined by target) is returned." + */ + switch (target) { + case GL_DRAW_FRAMEBUFFER: + case GL_FRAMEBUFFER: + fb = ctx->WinSysDrawBuffer; + break; + case GL_READ_FRAMEBUFFER: + fb = ctx->WinSysReadBuffer; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "glCheckNamedFramebufferStatus(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return 0; + } + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glCheckNamedFramebufferStatus"); + if (!fb) + return 0; + } + + return _mesa_check_framebuffer_status(ctx, fb); +} + + /** * Replicate the src attachment point. Used by framebuffer_texture() when * the same texture is attached at GL_DEPTH_ATTACHMENT and @@ -2487,144 +2773,308 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb, /** - * Common code called by glFramebufferTexture1D/2D/3D() and - * glFramebufferTextureLayer(). + * Common code called by gl*FramebufferTexture*() to retrieve the correct + * texture object pointer. * - * \param textarget is the textarget that was passed to the - * glFramebufferTexture...() function, or 0 if the corresponding function - * doesn't have a textarget parameter. + * \param texObj where the pointer to the texture object is returned. Note + * that a successful call may return texObj = NULL. * - * \param layered is true if this function was called from - * glFramebufferTexture(), false otherwise. + * \return true if no errors, false if errors */ -static void -framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, - GLenum attachment, GLenum textarget, GLuint texture, - GLint level, GLuint zoffset, GLboolean layered) +static bool +get_texture_for_framebuffer(struct gl_context *ctx, GLuint texture, + bool layered, const char *caller, + struct gl_texture_object **texObj) { - struct gl_renderbuffer_attachment *att; - struct gl_texture_object *texObj = NULL; - struct gl_framebuffer *fb; - GLenum maxLevelsTarget; + *texObj = NULL; /* This will get returned if texture = 0. */ - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture%s(target=0x%x)", caller, target); - return; + if (!texture) + return true; + + *texObj = _mesa_lookup_texture(ctx, texture); + if (*texObj == NULL || (*texObj)->Target == 0) { + /* Can't render to a non-existent texture object. + * + * The OpenGL 4.5 core spec (02.02.2015) in Section 9.2 Binding and + * Managing Framebuffer Objects specifies a different error + * depending upon the calling function (PDF pages 325-328). + * *FramebufferTexture (where layered = GL_TRUE) throws invalid + * value, while the other commands throw invalid operation (where + * layered = GL_FALSE). + */ + const GLenum error = layered ? GL_INVALID_VALUE : + GL_INVALID_OPERATION; + _mesa_error(ctx, error, + "%s(non-existent texture %u)", caller, texture); + return false; } - /* check framebuffer binding */ - if (_mesa_is_winsys_fbo(fb)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture%s", caller); - return; + return true; +} + + +/** + * Common code called by gl*FramebufferTexture() to verify the texture target + * and decide whether or not the attachment should truly be considered + * layered. + * + * \param layered true if attachment should be considered layered, false if + * not + * + * \return true if no errors, false if errors + */ +static bool +check_layered_texture_target(struct gl_context *ctx, GLenum target, + const char *caller, GLboolean *layered) +{ + *layered = GL_TRUE; + + switch (target) { + case GL_TEXTURE_3D: + case GL_TEXTURE_1D_ARRAY_EXT: + case GL_TEXTURE_2D_ARRAY_EXT: + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return true; + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + case GL_TEXTURE_2D_MULTISAMPLE: + /* These texture types are valid to pass to + * glFramebufferTexture(), but since they aren't layered, it + * is equivalent to calling glFramebufferTexture{1D,2D}(). + */ + *layered = GL_FALSE; + return true; } - /* The textarget, level, and zoffset parameters are only validated if - * texture is non-zero. + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid texture target %s)", caller, + _mesa_lookup_enum_by_nr(target)); + return false; +} + + +/** + * Common code called by gl*FramebufferTextureLayer() to verify the texture + * target. + * + * \return true if no errors, false if errors + */ +static bool +check_texture_target(struct gl_context *ctx, GLenum target, + const char *caller) +{ + /* We're being called by glFramebufferTextureLayer(). + * The only legal texture types for that function are 3D, + * cube-map, and 1D/2D/cube-map array textures. + * + * We don't need to check for GL_ARB_texture_cube_map_array because the + * application wouldn't have been able to create a texture with a + * GL_TEXTURE_CUBE_MAP_ARRAY target if the extension were not enabled. */ - if (texture) { - GLboolean err = GL_TRUE; - - texObj = _mesa_lookup_texture(ctx, texture); - if (texObj != NULL) { - if (textarget == 0) { - if (layered) { - /* We're being called by glFramebufferTexture() and textarget - * is not used. - */ - switch (texObj->Target) { - case GL_TEXTURE_3D: - case GL_TEXTURE_1D_ARRAY_EXT: - case GL_TEXTURE_2D_ARRAY_EXT: - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - err = false; - break; - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_2D_MULTISAMPLE: - /* These texture types are valid to pass to - * glFramebufferTexture(), but since they aren't layered, it - * is equivalent to calling glFramebufferTexture{1D,2D}(). - */ - err = false; - layered = false; - textarget = texObj->Target; - break; - default: - err = true; - break; - } - } else { - /* We're being called by glFramebufferTextureLayer() and - * textarget is not used. The only legal texture types for - * that function are 3D and 1D/2D arrays textures. - */ - err = (texObj->Target != GL_TEXTURE_3D) && - (texObj->Target != GL_TEXTURE_1D_ARRAY_EXT) && - (texObj->Target != GL_TEXTURE_2D_ARRAY_EXT) && - (texObj->Target != GL_TEXTURE_CUBE_MAP_ARRAY) && - (texObj->Target != GL_TEXTURE_2D_MULTISAMPLE_ARRAY); - } - } - else { - /* Make sure textarget is consistent with the texture's type */ - err = (texObj->Target == GL_TEXTURE_CUBE_MAP) - ? !_mesa_is_cube_face(textarget) - : (texObj->Target != textarget); - } + switch (target) { + case GL_TEXTURE_3D: + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return true; + case GL_TEXTURE_CUBE_MAP: + /* We don't need to check the extension (GL_ARB_direct_state_access) or + * GL version (4.5) for GL_TEXTURE_CUBE_MAP because DSA is always + * enabled in core profile. This can be called from + * _mesa_FramebufferTextureLayer in compatibility profile (OpenGL 3.0), + * so we do have to check the profile. + */ + return ctx->API == API_OPENGL_CORE; + } + + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid texture target %s)", caller, + _mesa_lookup_enum_by_nr(target)); + return false; +} + + +/** + * Common code called by glFramebufferTexture*D() to verify the texture + * target. + * + * \return true if no errors, false if errors + */ +static bool +check_textarget(struct gl_context *ctx, int dims, GLenum target, + GLenum textarget, const char *caller) +{ + bool err = false; + + switch (dims) { + case 1: + switch (textarget) { + case GL_TEXTURE_1D: + break; + case GL_TEXTURE_1D_ARRAY: + err = !ctx->Extensions.EXT_texture_array; + break; + default: + err = true; } - else { - /* can't render to a non-existant texture */ - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture%s(non existant texture)", - caller); - return; + break; + case 2: + switch (textarget) { + case GL_TEXTURE_2D: + break; + case GL_TEXTURE_RECTANGLE: + err = _mesa_is_gles(ctx) + || !ctx->Extensions.NV_texture_rectangle; + break; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + err = !ctx->Extensions.ARB_texture_cube_map; + break; + case GL_TEXTURE_2D_ARRAY: + err = (_mesa_is_gles(ctx) && ctx->Version < 30) + || !ctx->Extensions.EXT_texture_array; + break; + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + err = _mesa_is_gles(ctx) + || !ctx->Extensions.ARB_texture_multisample; + break; + default: + err = true; } + break; + case 3: + if (textarget != GL_TEXTURE_3D) + err = true; + break; + default: + err = true; + } - if (err) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture%s(texture target mismatch)", - caller); - return; - } + if (err) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(invalid textarget %s)", + caller, _mesa_lookup_enum_by_nr(textarget)); + return false; + } - if (texObj->Target == GL_TEXTURE_3D) { - const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); - if (zoffset >= maxSize) { - _mesa_error(ctx, GL_INVALID_VALUE, - "glFramebufferTexture%s(zoffset)", caller); - return; - } + /* Make sure textarget is consistent with the texture's type */ + err = (target == GL_TEXTURE_CUBE_MAP) ? + !_mesa_is_cube_face(textarget): (target != textarget); + + if (err) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(mismatched texture target)", caller); + return false; + } + + return true; +} + + +/** + * Common code called by gl*FramebufferTextureLayer() and + * glFramebufferTexture3D() to validate the layer. + * + * \return true if no errors, false if errors + */ +static bool +check_layer(struct gl_context *ctx, GLenum target, GLint layer, + const char *caller) +{ + /* Page 306 (page 328 of the PDF) of the OpenGL 4.5 (Core Profile) + * spec says: + * + * "An INVALID_VALUE error is generated if texture is non-zero + * and layer is negative." + */ + if (layer < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(layer %u < 0)", caller, layer); + return false; + } + + if (target == GL_TEXTURE_3D) { + const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); + if (layer >= maxSize) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(invalid layer %u)", caller, layer); + return false; } - else if ((texObj->Target == GL_TEXTURE_1D_ARRAY_EXT) || - (texObj->Target == GL_TEXTURE_2D_ARRAY_EXT) || - (texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) || - (texObj->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { - if (zoffset >= ctx->Const.MaxArrayTextureLayers) { - _mesa_error(ctx, GL_INVALID_VALUE, - "glFramebufferTexture%s(layer)", caller); - return; - } + } + else if ((target == GL_TEXTURE_1D_ARRAY) || + (target == GL_TEXTURE_2D_ARRAY) || + (target == GL_TEXTURE_CUBE_MAP_ARRAY) || + (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) { + if (layer >= ctx->Const.MaxArrayTextureLayers) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(layer %u >= GL_MAX_ARRAY_TEXTURE_LAYERS)", + caller, layer); + return false; } - - maxLevelsTarget = textarget ? textarget : texObj->Target; - if ((level < 0) || - (level >= _mesa_max_texture_levels(ctx, maxLevelsTarget))) { + } + else if (target == GL_TEXTURE_CUBE_MAP) { + if (layer >= 6) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFramebufferTexture%s(level)", caller); - return; + "%s(layer %u >= 6)", caller, layer); + return false; } } + return true; +} + + +/** + * Common code called by all gl*FramebufferTexture*() entry points to verify + * the level. + * + * \return true if no errors, false if errors + */ +static bool +check_level(struct gl_context *ctx, GLenum target, GLint level, + const char *caller) +{ + if ((level < 0) || + (level >= _mesa_max_texture_levels(ctx, target))) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(invalid level %d)", caller, level); + return false; + } + + return true; +} + + +void +_mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_object *texObj, GLenum textarget, + GLint level, GLuint layer, GLboolean layered, + const char *caller) +{ + struct gl_renderbuffer_attachment *att; + + /* The window-system framebuffer object is immutable */ + if (_mesa_is_winsys_fbo(fb)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(window-system framebuffer)", + caller); + return; + } + + /* Not a hash lookup, so we can afford to get the attachment here. */ att = get_attachment(ctx, fb, attachment); if (att == NULL) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture%s(attachment)", caller); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller, + _mesa_lookup_enum_by_nr(attachment)); return; } @@ -2637,7 +3087,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, level == fb->Attachment[BUFFER_STENCIL].TextureLevel && _mesa_tex_target_to_face(textarget) == fb->Attachment[BUFFER_STENCIL].CubeMapFace && - zoffset == fb->Attachment[BUFFER_STENCIL].Zoffset) { + layer == fb->Attachment[BUFFER_STENCIL].Zoffset) { /* The texture object is already attached to the stencil attachment * point. Don't create a new renderbuffer; just reuse the stencil * attachment's. This is required to prevent a GL error in @@ -2650,13 +3100,14 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, level == fb->Attachment[BUFFER_DEPTH].TextureLevel && _mesa_tex_target_to_face(textarget) == fb->Attachment[BUFFER_DEPTH].CubeMapFace && - zoffset == fb->Attachment[BUFFER_DEPTH].Zoffset) { + layer == fb->Attachment[BUFFER_DEPTH].Zoffset) { /* As above, but with depth and stencil transposed. */ reuse_framebuffer_texture_attachment(fb, BUFFER_STENCIL, BUFFER_DEPTH); } else { set_texture_attachment(ctx, fb, att, texObj, textarget, - level, zoffset, layered); + level, layer, layered); + if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) { /* Above we created a new renderbuffer and attached it to the * depth attachment point. Now attach it to the stencil attachment @@ -2692,116 +3143,157 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, } -void GLAPIENTRY -_mesa_FramebufferTexture1D(GLenum target, GLenum attachment, - GLenum textarget, GLuint texture, GLint level) +static void +framebuffer_texture_with_dims(int dims, GLenum target, + GLenum attachment, GLenum textarget, + GLuint texture, GLint level, GLint layer, + const char *caller) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller, + _mesa_lookup_enum_by_nr(target)); + return; + } - if (texture != 0) { - GLboolean error; + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, false, caller, &texObj)) + return; - switch (textarget) { - case GL_TEXTURE_1D: - error = GL_FALSE; - break; - case GL_TEXTURE_1D_ARRAY: - error = !ctx->Extensions.EXT_texture_array; - break; - default: - error = GL_TRUE; - } + if (texObj) { + if (!check_textarget(ctx, dims, texObj->Target, textarget, caller)) + return; - if (error) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture1D(textarget=%s)", - _mesa_lookup_enum_by_nr(textarget)); + if ((dims == 3) && !check_layer(ctx, texObj->Target, layer, caller)) return; - } } - framebuffer_texture(ctx, "1D", target, attachment, textarget, texture, - level, 0, GL_FALSE); + if (!check_level(ctx, textarget, level, caller)) + return; + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, + layer, GL_FALSE, caller); } void GLAPIENTRY -_mesa_FramebufferTexture2D(GLenum target, GLenum attachment, +_mesa_FramebufferTexture1D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level) { - GET_CURRENT_CONTEXT(ctx); - - if (texture != 0) { - GLboolean error; - - switch (textarget) { - case GL_TEXTURE_2D: - error = GL_FALSE; - break; - case GL_TEXTURE_RECTANGLE: - error = _mesa_is_gles(ctx) - || !ctx->Extensions.NV_texture_rectangle; - break; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - error = !ctx->Extensions.ARB_texture_cube_map; - break; - case GL_TEXTURE_2D_ARRAY: - error = (_mesa_is_gles(ctx) && ctx->Version < 30) - || !ctx->Extensions.EXT_texture_array; - break; - case GL_TEXTURE_2D_MULTISAMPLE: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - error = _mesa_is_gles(ctx) - || !ctx->Extensions.ARB_texture_multisample; - break; - default: - error = GL_TRUE; - } + framebuffer_texture_with_dims(1, target, attachment, textarget, texture, + level, 0, "glFramebufferTexture1D"); +} - if (error) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture2D(textarget=%s)", - _mesa_lookup_enum_by_nr(textarget)); - return; - } - } - framebuffer_texture(ctx, "2D", target, attachment, textarget, texture, - level, 0, GL_FALSE); +void GLAPIENTRY +_mesa_FramebufferTexture2D(GLenum target, GLenum attachment, + GLenum textarget, GLuint texture, GLint level) +{ + framebuffer_texture_with_dims(2, target, attachment, textarget, texture, + level, 0, "glFramebufferTexture2D"); } void GLAPIENTRY _mesa_FramebufferTexture3D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, - GLint level, GLint zoffset) + GLint level, GLint layer) +{ + framebuffer_texture_with_dims(3, target, attachment, textarget, texture, + level, layer, "glFramebufferTexture3D"); +} + + +void GLAPIENTRY +_mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, + GLuint texture, GLint level, GLint layer) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLenum textarget = 0; - if ((texture != 0) && (textarget != GL_TEXTURE_3D)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture3D(textarget)"); + const char *func = "glFramebufferTextureLayer"; + + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferTextureLayer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); return; } - framebuffer_texture(ctx, "3D", target, attachment, textarget, texture, - level, zoffset, GL_FALSE); + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, false, func, &texObj)) + return; + + if (texObj) { + if (!check_texture_target(ctx, texObj->Target, func)) + return; + + if (!check_layer(ctx, texObj->Target, layer, func)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; + + if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + assert(layer >= 0 && layer < 6); + textarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + layer; + layer = 0; + } + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, + layer, GL_FALSE, func); } void GLAPIENTRY -_mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, - GLuint texture, GLint level, GLint layer) +_mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level, GLint layer) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLenum textarget = 0; + + const char *func = "glNamedFramebufferTextureLayer"; + + /* Get the framebuffer object */ + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, func); + if (!fb) + return; + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, false, func, &texObj)) + return; + + if (texObj) { + if (!check_texture_target(ctx, texObj->Target, func)) + return; - framebuffer_texture(ctx, "Layer", target, attachment, 0, texture, - level, layer, GL_FALSE); + if (!check_layer(ctx, texObj->Target, layer, func)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; + + if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + assert(layer >= 0 && layer < 6); + textarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + layer; + layer = 0; + } + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, textarget, level, + layer, GL_FALSE, func); } @@ -2810,82 +3302,115 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, GLuint texture, GLint level) { GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered; - if (_mesa_has_geometry_shaders(ctx)) { - framebuffer_texture(ctx, "", target, attachment, 0, texture, - level, 0, GL_TRUE); - } else { + const char *func = "FramebufferTexture"; + + if (!_mesa_has_geometry_shaders(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "unsupported function (glFramebufferTexture) called"); + return; } + + /* Get the framebuffer object */ + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferTexture(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, true, func, &texObj)) + return; + + if (texObj) { + if (!check_layered_texture_target(ctx, texObj->Target, func, &layered)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; + } + + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + 0, layered, func); } void GLAPIENTRY -_mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, - GLenum renderbufferTarget, - GLuint renderbuffer) +_mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level) { - struct gl_renderbuffer_attachment *att; - struct gl_framebuffer *fb; - struct gl_renderbuffer *rb; GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + struct gl_texture_object *texObj; + GLboolean layered; - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferRenderbuffer(target)"); + const char *func = "glNamedFramebufferTexture"; + + if (!_mesa_has_geometry_shaders(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "unsupported function (glNamedFramebufferTexture) called"); return; } - if (renderbufferTarget != GL_RENDERBUFFER_EXT) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferRenderbuffer(renderbufferTarget)"); + /* Get the framebuffer object */ + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, func); + if (!fb) return; + + /* Get the texture object */ + if (!get_texture_for_framebuffer(ctx, texture, true, func, &texObj)) + return; + + if (texObj) { + if (!check_layered_texture_target(ctx, texObj->Target, func, + &layered)) + return; + + if (!check_level(ctx, texObj->Target, level, func)) + return; } + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, 0, level, + 0, layered, func); +} + + +void +_mesa_framebuffer_renderbuffer(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb, + const char *func) +{ + struct gl_renderbuffer_attachment *att; + if (_mesa_is_winsys_fbo(fb)) { /* Can't attach new renderbuffers to a window system framebuffer */ - _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferRenderbuffer"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(window-system framebuffer)", func); return; } att = get_attachment(ctx, fb, attachment); if (att == NULL) { _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferRenderbuffer(invalid attachment %s)", + "%s(invalid attachment %s)", func, _mesa_lookup_enum_by_nr(attachment)); return; } - if (renderbuffer) { - rb = _mesa_lookup_renderbuffer(ctx, renderbuffer); - if (!rb) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferRenderbuffer(non-existant" - " renderbuffer %u)", renderbuffer); - return; - } - else if (rb == &DummyRenderbuffer) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferRenderbuffer(renderbuffer %u)", - renderbuffer); - return; - } - } - else { - /* remove renderbuffer attachment */ - rb = NULL; - } - if (attachment == GL_DEPTH_STENCIL_ATTACHMENT && rb && rb->Format != MESA_FORMAT_NONE) { /* make sure the renderbuffer is a depth/stencil format */ const GLenum baseFormat = _mesa_get_format_base_format(rb->Format); if (baseFormat != GL_DEPTH_STENCIL) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferRenderbuffer(renderbuffer" - " is not DEPTH_STENCIL format)"); + "%s(renderbuffer is not DEPTH_STENCIL format)", func); return; } } @@ -2903,24 +3428,94 @@ _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, void GLAPIENTRY -_mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, - GLenum pname, GLint *params) +_mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, + GLenum renderbuffertarget, + GLuint renderbuffer) { - const struct gl_renderbuffer_attachment *att; - struct gl_framebuffer *buffer; - GLenum err; + struct gl_framebuffer *fb; + struct gl_renderbuffer *rb; GET_CURRENT_CONTEXT(ctx); - /* The error differs in GL and GLES. */ - err = _mesa_is_desktop_gl(ctx) ? GL_INVALID_OPERATION : GL_INVALID_ENUM; + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glFramebufferRenderbuffer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } - buffer = get_framebuffer_target(ctx, target); - if (!buffer) { + if (renderbuffertarget != GL_RENDERBUFFER) { _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(target)"); + "glFramebufferRenderbuffer(renderbuffertarget is not " + "GL_RENDERBUFFER)"); return; } + if (renderbuffer) { + rb = _mesa_lookup_renderbuffer_err(ctx, renderbuffer, + "glFramebufferRenderbuffer"); + if (!rb) + return; + } + else { + /* remove renderbuffer attachment */ + rb = NULL; + } + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb, + "glFramebufferRenderbuffer"); +} + + +void GLAPIENTRY +_mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, + GLenum renderbuffertarget, + GLuint renderbuffer) +{ + struct gl_framebuffer *fb; + struct gl_renderbuffer *rb; + GET_CURRENT_CONTEXT(ctx); + + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferRenderbuffer"); + if (!fb) + return; + + if (renderbuffertarget != GL_RENDERBUFFER) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glNamedFramebufferRenderbuffer(renderbuffertarget is not " + "GL_RENDERBUFFER)"); + return; + } + + if (renderbuffer) { + rb = _mesa_lookup_renderbuffer_err(ctx, renderbuffer, + "glNamedFramebufferRenderbuffer"); + if (!rb) + return; + } + else { + /* remove renderbuffer attachment */ + rb = NULL; + } + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb, + "glNamedFramebufferRenderbuffer"); +} + + +void +_mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, + struct gl_framebuffer *buffer, + GLenum attachment, GLenum pname, + GLint *params, const char *caller) +{ + const struct gl_renderbuffer_attachment *att; + GLenum err; + + /* The error differs in GL and GLES. */ + err = _mesa_is_desktop_gl(ctx) ? GL_INVALID_OPERATION : GL_INVALID_ENUM; + if (_mesa_is_winsys_fbo(buffer)) { /* Page 126 (page 136 of the PDF) of the OpenGL ES 2.0.25 spec * says: @@ -2936,14 +3531,15 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, !ctx->Extensions.ARB_framebuffer_object) && !_mesa_is_gles3(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(bound FBO = 0)"); + "%s(window-system framebuffer)", caller); return; } if (_mesa_is_gles3(ctx) && attachment != GL_BACK && attachment != GL_DEPTH && attachment != GL_STENCIL) { _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(attachment)"); + "%s(invalid attachment %s)", caller, + _mesa_lookup_enum_by_nr(attachment)); return; } /* the default / window-system FBO */ @@ -2955,8 +3551,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, } if (att == NULL) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(attachment)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller, + _mesa_lookup_enum_by_nr(attachment)); return; } @@ -2970,9 +3566,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, * attachment, since it does not have a single format." */ _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(" - "GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE" - " is invalid for depth+stencil attachment)"); + "%s(GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE" + " is invalid for depth+stencil attachment)", caller); return; } /* the depth and stencil attachments must point to the same buffer */ @@ -2980,8 +3575,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, stencilAtt = get_attachment(ctx, buffer, GL_STENCIL_ATTACHMENT); if (depthAtt->Renderbuffer != stencilAtt->Renderbuffer) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(DEPTH/STENCIL" - " attachments differ)"); + "%s(DEPTH/STENCIL attachments differ)", caller); return; } } @@ -3014,8 +3608,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, *params = att->TextureLevel; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { goto invalid_pname_enum; @@ -3031,8 +3625,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { goto invalid_pname_enum; @@ -3042,8 +3636,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, if (ctx->API == API_OPENGLES) { goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else if (att->Type == GL_TEXTURE) { if (att->Texture && (att->Texture->Target == GL_TEXTURE_3D || att->Texture->Target == GL_TEXTURE_2D_ARRAY)) { @@ -3064,8 +3658,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { if (ctx->Extensions.EXT_framebuffer_sRGB) { @@ -3087,8 +3681,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { mesa_format format = att->Renderbuffer->Format; @@ -3103,9 +3697,9 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, if (_mesa_is_gles3(ctx) && attachment == GL_DEPTH_STENCIL_ATTACHMENT) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameteriv(cannot query " + "%s(cannot query " "GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE of " - "GL_DEPTH_STENCIL_ATTACHMENT"); + "GL_DEPTH_STENCIL_ATTACHMENT)", caller); return; } @@ -3139,8 +3733,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, goto invalid_pname_enum; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else if (att->Texture) { const struct gl_texture_image *texImage = @@ -3159,8 +3753,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, att->Renderbuffer->Format); } else { - _mesa_problem(ctx, "glGetFramebufferAttachmentParameterivEXT:" - " invalid FBO attachment structure"); + _mesa_problem(ctx, "%s: invalid FBO attachment structure", caller); } return; case GL_FRAMEBUFFER_ATTACHMENT_LAYERED: @@ -3169,8 +3762,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, } else if (att->Type == GL_TEXTURE) { *params = att->Layered; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, err, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); } else { goto invalid_pname_enum; } @@ -3182,30 +3775,144 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, return; invalid_pname_enum: - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameteriv(pname)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname %s)", caller, + _mesa_lookup_enum_by_nr(pname)); return; } +void GLAPIENTRY +_mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, + GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *buffer; + + buffer = get_framebuffer_target(ctx, target); + if (!buffer) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferAttachmentParameteriv(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + _mesa_get_framebuffer_attachment_parameter(ctx, buffer, attachment, pname, + params, + "glGetFramebufferAttachmentParameteriv"); +} + + +void GLAPIENTRY +_mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, + GLenum attachment, + GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *buffer; + + if (framebuffer) { + buffer = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glGetNamedFramebufferAttachmentParameteriv"); + if (!buffer) + return; + } + else { + /* + * Section 9.2 Binding and Managing Framebuffer Objects of the OpenGL + * 4.5 core spec (30.10.2014, PDF page 314): + * "If framebuffer is zero, then the default draw framebuffer is + * queried." + */ + buffer = ctx->WinSysDrawBuffer; + } + + _mesa_get_framebuffer_attachment_parameter(ctx, buffer, attachment, pname, + params, + "glGetNamedFramebufferAttachmentParameteriv"); +} + + +void GLAPIENTRY +_mesa_NamedFramebufferParameteri(GLuint framebuffer, GLenum pname, + GLint param) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb = NULL; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferParameteri(" + "ARB_framebuffer_no_attachments not implemented)"); + return; + } + + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glNamedFramebufferParameteri"); + + if (fb) { + framebuffer_parameteri(ctx, fb, pname, param, + "glNamedFramebufferParameteriv"); + } +} + + +void GLAPIENTRY +_mesa_GetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, + GLint *param) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glNamedFramebufferParameteriv(" + "ARB_framebuffer_no_attachments not implemented)"); + return; + } + + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glGetNamedFramebufferParameteriv"); + } else { + fb = ctx->WinSysDrawBuffer; + } + + if (fb) { + get_framebuffer_parameteriv(ctx, fb, pname, param, + "glGetNamedFramebufferParameteriv"); + } +} + + static void -invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments, +invalidate_framebuffer_storage(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height, const char *name) { int i; - struct gl_framebuffer *fb; - GET_CURRENT_CONTEXT(ctx); - fb = get_framebuffer_target(ctx, target); - if (!fb) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(target)", name); + /* Section 17.4 Whole Framebuffer Operations of the OpenGL 4.5 Core + * Spec (2.2.2015, PDF page 522) says: + * "An INVALID_VALUE error is generated if numAttachments, width, or + * height is negative." + */ + if (numAttachments < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(numAttachments < 0)", name); return; } - if (numAttachments < 0) { + if (width < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s(numAttachments < 0)", name); + "%s(width < 0)", name); + return; + } + + if (height < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(height < 0)", name); return; } @@ -3301,7 +4008,8 @@ invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments, return; invalid_enum: - _mesa_error(ctx, GL_INVALID_ENUM, "%s(attachment)", name); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", name, + _mesa_lookup_enum_by_nr(attachments[i])); return; } @@ -3311,16 +4019,67 @@ _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height) { - invalidate_framebuffer_storage(target, numAttachments, attachments, + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glInvalidateSubFramebuffer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, x, y, width, height, "glInvalidateSubFramebuffer"); } void GLAPIENTRY +_mesa_InvalidateNamedFramebufferSubData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole + * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the + * default draw framebuffer is affected." + */ + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glInvalidateNamedFramebufferSubData"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, + x, y, width, height, + "glInvalidateNamedFramebufferSubData"); +} + + +void GLAPIENTRY _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments) { + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + fb = get_framebuffer_target(ctx, target); + if (!fb) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glInvalidateFramebuffer(invalid target %s)", + _mesa_lookup_enum_by_nr(target)); + return; + } + /* The GL_ARB_invalidate_subdata spec says: * * "The command @@ -3333,7 +4092,7 @@ _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, * <width>, <height> equal to 0, 0, <MAX_VIEWPORT_DIMS[0]>, * <MAX_VIEWPORT_DIMS[1]> respectively." */ - invalidate_framebuffer_storage(target, numAttachments, attachments, + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, 0, 0, MAX_VIEWPORT_WIDTH, MAX_VIEWPORT_HEIGHT, "glInvalidateFramebuffer"); @@ -3341,6 +4100,46 @@ _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, void GLAPIENTRY +_mesa_InvalidateNamedFramebufferData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments) +{ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + /* The OpenGL 4.5 core spec (02.02.2015) says (in Section 17.4 Whole + * Framebuffer Operations, PDF page 522): "If framebuffer is zero, the + * default draw framebuffer is affected." + */ + if (framebuffer) { + fb = _mesa_lookup_framebuffer_err(ctx, framebuffer, + "glInvalidateNamedFramebufferData"); + if (!fb) + return; + } + else + fb = ctx->WinSysDrawBuffer; + + /* The GL_ARB_invalidate_subdata spec says: + * + * "The command + * + * void InvalidateFramebuffer(enum target, + * sizei numAttachments, + * const enum *attachments); + * + * is equivalent to the command InvalidateSubFramebuffer with <x>, <y>, + * <width>, <height> equal to 0, 0, <MAX_VIEWPORT_DIMS[0]>, + * <MAX_VIEWPORT_DIMS[1]> respectively." + */ + invalidate_framebuffer_storage(ctx, fb, numAttachments, attachments, + 0, 0, + MAX_VIEWPORT_WIDTH, MAX_VIEWPORT_HEIGHT, + "glInvalidateNamedFramebufferData"); +} + + +void GLAPIENTRY _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, const GLenum *attachments) { diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 61aa1f50308..8dad0ff34e7 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -64,9 +64,17 @@ _mesa_get_incomplete_framebuffer(void); extern struct gl_renderbuffer * _mesa_lookup_renderbuffer(struct gl_context *ctx, GLuint id); +extern struct gl_renderbuffer * +_mesa_lookup_renderbuffer_err(struct gl_context *ctx, GLuint id, + const char *func); + extern struct gl_framebuffer * _mesa_lookup_framebuffer(struct gl_context *ctx, GLuint id); +extern struct gl_framebuffer * +_mesa_lookup_framebuffer_err(struct gl_context *ctx, GLuint id, + const char *func); + void _mesa_update_texture_renderbuffer(struct gl_context *ctx, @@ -74,9 +82,17 @@ _mesa_update_texture_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer_attachment *att); extern void +_mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb); + +extern void _mesa_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum attachment, struct gl_renderbuffer *rb); + GLenum attachment, + struct gl_renderbuffer *rb, + const char *func); extern void _mesa_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb); @@ -99,6 +115,24 @@ _mesa_detach_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, const void *att); +extern void +_mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_object *texObj, GLenum textarget, + GLint level, GLuint layer, GLboolean layered, + const char *caller); + +extern GLenum +_mesa_check_framebuffer_status(struct gl_context *ctx, + struct gl_framebuffer *fb); + +extern void +_mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, + struct gl_framebuffer *buffer, + GLenum attachment, GLenum pname, + GLint *params, const char *caller); + + extern GLboolean GLAPIENTRY _mesa_IsRenderbuffer(GLuint renderbuffer); @@ -165,9 +199,15 @@ _mesa_DeleteFramebuffers(GLsizei n, const GLuint *framebuffers); extern void GLAPIENTRY _mesa_GenFramebuffers(GLsizei n, GLuint *framebuffers); +extern void GLAPIENTRY +_mesa_CreateFramebuffers(GLsizei n, GLuint *framebuffers); + extern GLenum GLAPIENTRY _mesa_CheckFramebufferStatus(GLenum target); +extern GLenum GLAPIENTRY +_mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target); + extern void GLAPIENTRY _mesa_FramebufferTexture1D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); @@ -179,24 +219,49 @@ _mesa_FramebufferTexture2D(GLenum target, GLenum attachment, extern void GLAPIENTRY _mesa_FramebufferTexture3D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, - GLint level, GLint zoffset); + GLint level, GLint layer); extern void GLAPIENTRY _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); extern void GLAPIENTRY +_mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level, GLint layer); + +extern void GLAPIENTRY _mesa_FramebufferTexture(GLenum target, GLenum attachment, GLuint texture, GLint level); extern void GLAPIENTRY +_mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, + GLuint texture, GLint level); + +extern void GLAPIENTRY _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); extern void GLAPIENTRY +_mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, + GLenum renderbuffertarget, + GLuint renderbuffer); + +extern void GLAPIENTRY _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, GLenum pname, GLint *params); +extern void GLAPIENTRY +_mesa_GetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, + GLenum attachment, + GLenum pname, GLint *params); + +extern void GLAPIENTRY +_mesa_NamedFramebufferParameteri(GLuint framebuffer, GLenum pname, + GLint param); + +extern void GLAPIENTRY +_mesa_GetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, + GLint *param); extern void GLAPIENTRY _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, @@ -204,11 +269,29 @@ _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, GLsizei width, GLsizei height); extern void GLAPIENTRY +_mesa_InvalidateNamedFramebufferSubData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments, + GLint x, GLint y, + GLsizei width, GLsizei height); + +extern void GLAPIENTRY _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, const GLenum *attachments); extern void GLAPIENTRY +_mesa_InvalidateNamedFramebufferData(GLuint framebuffer, + GLsizei numAttachments, + const GLenum *attachments); + +extern void GLAPIENTRY _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, const GLenum *attachments); +extern void GLAPIENTRY +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param); + +extern void GLAPIENTRY +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params); + #endif /* FBOBJECT_H */ diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 8af44e90520..baeb1bfe5de 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -397,6 +397,11 @@ format_array_format_table_init(void) format_array_format_table = _mesa_hash_table_create(NULL, NULL, array_formats_equal); + if (!format_array_format_table) { + _mesa_error_no_memory(__func__); + return; + } + for (f = 1; f < MESA_FORMAT_COUNT; ++f) { info = _mesa_get_format_info(f); if (!info->ArrayFormat) @@ -432,6 +437,12 @@ _mesa_format_from_array_format(uint32_t array_format) call_once(&format_array_format_table_exists, format_array_format_table_init); + if (!format_array_format_table) { + static const once_flag once_flag_init = ONCE_FLAG_INIT; + format_array_format_table_exists = once_flag_init; + return MESA_FORMAT_NONE; + } + entry = _mesa_hash_table_search_pre_hashed(format_array_format_table, array_format, (void *)(intptr_t)array_format); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 4f7736a64d0..77c04b8dab8 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -157,6 +157,7 @@ _mesa_initialize_window_framebuffer(struct gl_framebuffer *fb, fb->_Status = GL_FRAMEBUFFER_COMPLETE_EXT; fb->_AllColorBuffersFixedPoint = !visual->floatMode; fb->_HasSNormOrFloatColorBuffer = visual->floatMode; + fb->_HasAttachments = true; compute_depth_max(fb); } @@ -312,7 +313,7 @@ _mesa_resize_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb, if (ctx) { /* update scissor / window bounds */ - _mesa_update_draw_buffer_bounds(ctx); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); /* Signal new buffer state so that swrast will update its clipping * info (the CLIP_BIT flag). */ @@ -356,30 +357,20 @@ update_framebuffer_size(struct gl_context *ctx, struct gl_framebuffer *fb) } + /** - * Calculate the inclusive bounding box for the scissor of a specific viewport + * Given a bounding box, intersect the bounding box with the scissor of + * a specified vieport. * * \param ctx GL context. - * \param buffer Framebuffer to be checked against * \param idx Index of the desired viewport * \param bbox Bounding box for the scissored viewport. Stored as xmin, * xmax, ymin, ymax. - * - * \warning This function assumes that the framebuffer dimensions are up to - * date (e.g., update_framebuffer_size has been recently called on \c buffer). - * - * \sa _mesa_clip_to_region */ void -_mesa_scissor_bounding_box(const struct gl_context *ctx, - const struct gl_framebuffer *buffer, - unsigned idx, int *bbox) +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox) { - bbox[0] = 0; - bbox[2] = 0; - bbox[1] = buffer->Width; - bbox[3] = buffer->Height; - if (ctx->Scissor.EnableFlags & (1u << idx)) { if (ctx->Scissor.ScissorArray[idx].X > bbox[0]) { bbox[0] = ctx->Scissor.ScissorArray[idx].X; @@ -401,6 +392,33 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, bbox[2] = bbox[3]; } } +} + +/** + * Calculate the inclusive bounding box for the scissor of a specific viewport + * + * \param ctx GL context. + * \param buffer Framebuffer to be checked against + * \param idx Index of the desired viewport + * \param bbox Bounding box for the scissored viewport. Stored as xmin, + * xmax, ymin, ymax. + * + * \warning This function assumes that the framebuffer dimensions are up to + * date (e.g., update_framebuffer_size has been recently called on \c buffer). + * + * \sa _mesa_clip_to_region + */ +void +_mesa_scissor_bounding_box(const struct gl_context *ctx, + const struct gl_framebuffer *buffer, + unsigned idx, int *bbox) +{ + bbox[0] = 0; + bbox[2] = 0; + bbox[1] = buffer->Width; + bbox[3] = buffer->Height; + + _mesa_intersect_scissor_bounding_box(ctx, idx, bbox); assert(bbox[0] <= bbox[1]); assert(bbox[2] <= bbox[3]); @@ -413,9 +431,9 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, * \param ctx the GL context. */ void -_mesa_update_draw_buffer_bounds(struct gl_context *ctx) +_mesa_update_draw_buffer_bounds(struct gl_context *ctx, + struct gl_framebuffer *buffer) { - struct gl_framebuffer *buffer = ctx->DrawBuffer; int bbox[4]; if (!buffer) @@ -652,7 +670,7 @@ update_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) * context state (GL_READ_BUFFER too). */ if (fb->ColorDrawBuffer[0] != ctx->Color.DrawBuffer[0]) { - _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, + _mesa_drawbuffers(ctx, fb, ctx->Const.MaxDrawBuffers, ctx->Color.DrawBuffer, NULL); } } @@ -678,24 +696,21 @@ update_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) /** - * Update state related to the current draw/read framebuffers. + * Update state related to the draw/read framebuffers. */ void -_mesa_update_framebuffer(struct gl_context *ctx) +_mesa_update_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb) { - struct gl_framebuffer *drawFb; - struct gl_framebuffer *readFb; - assert(ctx); - drawFb = ctx->DrawBuffer; - readFb = ctx->ReadBuffer; update_framebuffer(ctx, drawFb); if (readFb != drawFb) update_framebuffer(ctx, readFb); - _mesa_update_clamp_vertex_color(ctx); - _mesa_update_clamp_fragment_color(ctx); + _mesa_update_clamp_vertex_color(ctx, drawFb); + _mesa_update_clamp_fragment_color(ctx, drawFb); } diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index a4274216ec2..08e43222045 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -75,16 +75,50 @@ extern void _mesa_scissor_bounding_box(const struct gl_context *ctx, const struct gl_framebuffer *buffer, unsigned idx, int *bbox); +extern void +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox); + +static inline GLuint +_mesa_geometric_width(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->Width : buffer->DefaultGeometry.Width; +} + +static inline GLuint +_mesa_geometric_height(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->Height : buffer->DefaultGeometry.Height; +} + +static inline GLuint +_mesa_geometric_samples(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->Visual.samples : buffer->DefaultGeometry.NumSamples; +} + +static inline GLuint +_mesa_geometric_layers(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? + buffer->MaxNumLayers : buffer->DefaultGeometry.Layers; +} extern void -_mesa_update_draw_buffer_bounds(struct gl_context *ctx); +_mesa_update_draw_buffer_bounds(struct gl_context *ctx, + struct gl_framebuffer *drawFb); extern void _mesa_update_framebuffer_visual(struct gl_context *ctx, struct gl_framebuffer *fb); extern void -_mesa_update_framebuffer(struct gl_context *ctx); +_mesa_update_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *readFb, + struct gl_framebuffer *drawFb); extern GLboolean _mesa_source_buffer_exists(struct gl_context *ctx, GLenum format); diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index a881bc589ba..3d6d63916b3 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -138,6 +138,7 @@ enum value_extra { EXTRA_API_GL_CORE, EXTRA_API_ES2, EXTRA_API_ES3, + EXTRA_API_ES31, EXTRA_NEW_BUFFERS, EXTRA_NEW_FRAG_CLAMP, EXTRA_VALID_DRAW_BUFFER, @@ -348,6 +349,12 @@ static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = { EXTRA_END }; +static const int extra_ARB_draw_indirect_es31[] = { + EXT(ARB_draw_indirect), + EXTRA_API_ES31, + EXTRA_END +}; + EXTRA_EXT(ARB_texture_cube_map); EXTRA_EXT(EXT_texture_array); EXTRA_EXT(NV_fog_distance); @@ -393,6 +400,7 @@ EXTRA_EXT(INTEL_performance_query); EXTRA_EXT(ARB_explicit_uniform_location); EXTRA_EXT(ARB_clip_control); EXTRA_EXT(EXT_polygon_offset_clamp); +EXTRA_EXT(ARB_framebuffer_no_attachments); static const int extra_ARB_color_buffer_float_or_glcore[] = { @@ -909,13 +917,13 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_FOG_COLOR: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4FV(v->value_float_4, ctx->Fog.Color); else COPY_4FV(v->value_float_4, ctx->Fog.ColorUnclamped); break; case GL_COLOR_CLEAR_VALUE: - if (_mesa_get_clamp_fragment_color(ctx)) { + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) { v->value_float_4[0] = CLAMP(ctx->Color.ClearColor.f[0], 0.0F, 1.0F); v->value_float_4[1] = CLAMP(ctx->Color.ClearColor.f[1], 0.0F, 1.0F); v->value_float_4[2] = CLAMP(ctx->Color.ClearColor.f[2], 0.0F, 1.0F); @@ -924,13 +932,13 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu COPY_4FV(v->value_float_4, ctx->Color.ClearColor.f); break; case GL_BLEND_COLOR_EXT: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4FV(v->value_float_4, ctx->Color.BlendColor); else COPY_4FV(v->value_float_4, ctx->Color.BlendColorUnclamped); break; case GL_ALPHA_TEST_REF: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) v->value_float = ctx->Color.AlphaRef; else v->value_float = ctx->Color.AlphaRefUnclamped; @@ -1078,6 +1086,11 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d if (_mesa_is_gles3(ctx)) api_found = GL_TRUE; break; + case EXTRA_API_ES31: + api_check = GL_TRUE; + if (_mesa_is_gles31(ctx)) + api_found = GL_TRUE; + break; case EXTRA_API_GL: api_check = GL_TRUE; if (_mesa_is_desktop_gl(ctx)) @@ -1911,6 +1924,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v) if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs) goto invalid_value; v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].Stride; + return TYPE_INT; /* ARB_shader_image_load_store */ case GL_IMAGE_BINDING_NAME: { diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 41cb2c17b60..74ff3ba6619 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -409,6 +409,12 @@ descriptor=[ [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ], ]}, +# Enums in OpenGL Core profile and ES 3.1 +{ "apis": ["GL_CORE", "GLES3"], "params": [ +# GL_ARB_draw_indirect / GLES 3.1 + [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect_es31" ], +]}, + # Remaining enums are only in OpenGL { "apis": ["GL", "GL_CORE"], "params": [ [ "ACCUM_RED_BITS", "BUFFER_INT(Visual.accumRedBits), NO_EXTRA" ], @@ -793,19 +799,20 @@ descriptor=[ [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader" ], [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader" ], -# GL_ARB_gpu_shader5 - [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ], - [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], - [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], - [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ], +# GL_ARB_framebuffer_no_attachments + ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"], + ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"], + ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"], + ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"], + +# GL_EXT_polygon_offset_clamp + [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ], ]}, # Enums restricted to OpenGL Core profile { "apis": ["GL_CORE"], "params": [ # GL_ARB_texture_buffer_range [ "TEXTURE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.TextureBufferOffsetAlignment), extra_ARB_texture_buffer_range" ], -# GL_ARB_draw_indirect - [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ], # GL_ARB_viewport_array [ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ], @@ -814,8 +821,11 @@ descriptor=[ [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ], [ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ], -# GL_EXT_polygon_offset_clamp - [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ], +# GL_ARB_gpu_shader5 + [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ], + [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], + [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], + [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ], ]} ] diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c index 1b2c7f054f6..72d99ca4e22 100644 --- a/src/mesa/main/getstring.c +++ b/src/mesa/main/getstring.c @@ -72,10 +72,18 @@ shading_language_version(struct gl_context *ctx) break; case API_OPENGLES2: - return (ctx->Version < 30) - ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16" - : (const GLubyte *) "OpenGL ES GLSL ES 3.00"; - + switch (ctx->Version) { + case 20: + return (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"; + case 30: + return (const GLubyte *) "OpenGL ES GLSL ES 3.00"; + case 31: + return (const GLubyte *) "OpenGL ES GLSL ES 3.10"; + default: + _mesa_problem(ctx, + "Invalid OpenGL ES version in shading_language_version()"); + return (const GLubyte *) 0; + } case API_OPENGLES: /* fall-through */ diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 8ced5794938..ac69fabccaa 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -1200,7 +1200,7 @@ _mesa_is_depth_or_stencil_format(GLenum format) * \return GL_TRUE if compressed, GL_FALSE if uncompressed */ GLboolean -_mesa_is_compressed_format(struct gl_context *ctx, GLenum format) +_mesa_is_compressed_format(const struct gl_context *ctx, GLenum format) { switch (format) { case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: @@ -1678,6 +1678,10 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx, case GL_LUMINANCE: case GL_ALPHA: return GL_NO_ERROR; + case GL_RG: + case GL_RED: + if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_texture_rg) + return GL_NO_ERROR; default: return GL_INVALID_OPERATION; } @@ -2292,8 +2296,18 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, break; case GL_HALF_FLOAT: - if (internalFormat != GL_RG16F) - return GL_INVALID_OPERATION; + case GL_HALF_FLOAT_OES: + switch (internalFormat) { + case GL_RG16F: + break; + case GL_RG: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_half_float) + break; + /* fallthrough */ + default: + return GL_INVALID_OPERATION; + } break; case GL_FLOAT: @@ -2301,6 +2315,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, case GL_RG16F: case GL_RG32F: break; + case GL_RG: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_float) + break; + /* fallthrough */ default: return GL_INVALID_OPERATION; } @@ -2361,8 +2380,19 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, break; case GL_HALF_FLOAT: - if (internalFormat != GL_R16F) + case GL_HALF_FLOAT_OES: + switch (internalFormat) { + case GL_R16F: + break; + case GL_RG: + case GL_RED: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_half_float) + break; + /* fallthrough */ + default: return GL_INVALID_OPERATION; + } break; case GL_FLOAT: @@ -2370,6 +2400,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, case GL_R16F: case GL_R32F: break; + case GL_RED: + if (ctx->Extensions.ARB_texture_rg && + ctx->Extensions.OES_texture_float) + break; + /* fallthrough */ default: return GL_INVALID_OPERATION; } diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h index e1ecd64d5f9..8881cb7d86b 100644 --- a/src/mesa/main/glformats.h +++ b/src/mesa/main/glformats.h @@ -96,7 +96,7 @@ extern GLboolean _mesa_is_depth_or_stencil_format(GLenum format); extern GLboolean -_mesa_is_compressed_format(struct gl_context *ctx, GLenum format); +_mesa_is_compressed_format(const struct gl_context *ctx, GLenum format); extern GLenum _mesa_base_format_to_integer_format(GLenum format); diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 7f7f9a39b3b..a2d98d4ddff 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -135,12 +135,6 @@ typedef void *GLeglImageOES; #define GL_SHADER_PROGRAM_MESA 0x9999 -/** - * Internal token for geometry programs. - * Use the value for GL_GEOMETRY_PROGRAM_NV for now. - */ -#define MESA_GEOMETRY_PROGRAM 0x8c26 - /* Several fields of struct gl_config can take these as values. Since * GLX header files may not be available everywhere they need to be used, * redefine them here. diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c index d04cccd94d2..315b5d64004 100644 --- a/src/mesa/main/hash.c +++ b/src/mesa/main/hash.c @@ -389,34 +389,6 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table, /** - * Clone all entries in a hash table, into a new table. - * - * \param table the hash table to clone - */ -struct _mesa_HashTable * -_mesa_HashClone(const struct _mesa_HashTable *table) -{ - /* cast-away const */ - struct _mesa_HashTable *table2 = (struct _mesa_HashTable *) table; - struct hash_entry *entry; - struct _mesa_HashTable *clonetable; - - assert(table); - mtx_lock(&table2->Mutex); - - clonetable = _mesa_NewHashTable(); - assert(clonetable); - hash_table_foreach(table->ht, entry) { - _mesa_HashInsert(clonetable, (GLint)(uintptr_t)entry->key, entry->data); - } - - mtx_unlock(&table2->Mutex); - - return clonetable; -} - - -/** * Walk over all entries in a hash table, calling callback function for each. * Note: we use a separate mutex in this function to avoid a recursive * locking deadlock (in case the callback calls _mesa_HashRemove()) and to diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h index e3e8f492e8b..da3b9973d24 100644 --- a/src/mesa/main/hash.h +++ b/src/mesa/main/hash.h @@ -59,9 +59,6 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table, void (*callback)(GLuint key, void *data, void *userData), void *userData); -extern struct _mesa_HashTable * -_mesa_HashClone(const struct _mesa_HashTable *table); - extern void _mesa_HashWalk(const struct _mesa_HashTable *table, void (*callback)(GLuint key, void *data, void *userData), diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index c4d917ebba4..9ffe3decd0f 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -230,38 +230,6 @@ static inline int IFLOOR(float f) } -/** Return (as an integer) ceiling of float */ -static inline int ICEIL(float f) -{ -#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__) - /* - * IEEE ceil for computers that round to nearest or even. - * 'f' must be between -4194304 and 4194303. - * This ceil operation is done by "(iround(f + .5) + iround(f - .5) + 1) >> 1", - * but uses some IEEE specific tricks for better speed. - * Contributed by Josh Vanderhoof - */ - int ai, bi; - double af, bf; - af = (3 << 22) + 0.5 + (double)f; - bf = (3 << 22) + 0.5 - (double)f; - /* GCC generates an extra fstp/fld without this. */ - __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st"); - __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st"); - return (ai - bi + 1) >> 1; -#else - int ai, bi; - double af, bf; - fi_type u; - af = (3 << 22) + 0.5 + (double)f; - bf = (3 << 22) + 0.5 - (double)f; - u.f = (float) af; ai = u.i; - u.f = (float) bf; bi = u.i; - return (ai - bi + 1) >> 1; -#endif -} - - /** * Is x a power of two? */ diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index bd84113ea91..481fd5e7fdf 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -43,7 +43,6 @@ #include "glapi/glapi.h" #include "math/m_matrix.h" /* GLmatrix */ #include "glsl/shader_enums.h" -#include "util/simple_list.h" /* struct simple_node */ #include "main/formats.h" /* MESA_FORMAT_COUNT */ @@ -398,7 +397,6 @@ struct gl_config { GLboolean rgbMode; GLboolean floatMode; - GLboolean colorIndexMode; /* XXX is this used anywhere? */ GLuint doubleBufferMode; GLuint stereoMode; @@ -2099,8 +2097,6 @@ struct gl_program GLbitfield64 DoubleInputsRead; /**< Bitmask of which input regs are read and are doubles */ GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ - GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ - GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */ GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ @@ -2275,16 +2271,10 @@ struct gl_vertex_program_state */ struct gl_geometry_program_state { - GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */ - GLboolean _Enabled; /**< Enabled and valid program? */ - struct gl_geometry_program *Current; /**< user-bound geometry program */ - /** Currently enabled and valid program (including internal programs * and compiled shader programs). */ struct gl_geometry_program *_Current; - - GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ }; /** @@ -2320,8 +2310,6 @@ struct gl_fragment_program_state */ struct gl_compute_program_state { - struct gl_compute_program *Current; /**< user-bound compute program */ - /** Currently enabled and valid program (including internal programs * and compiled shader programs). */ @@ -2733,7 +2721,7 @@ struct gl_shader_program } Comp; /* post-link info: */ - unsigned NumUserUniformStorage; + unsigned NumUniformStorage; unsigned NumHiddenUniforms; struct gl_uniform_storage *UniformStorage; @@ -2832,6 +2820,8 @@ struct gl_pipeline_object mtx_t Mutex; + GLchar *Label; /**< GL_KHR_debug */ + /** * Programs used for rendering * @@ -3009,7 +2999,6 @@ struct gl_shared_state struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */ struct gl_vertex_program *DefaultVertexProgram; struct gl_fragment_program *DefaultFragmentProgram; - struct gl_geometry_program *DefaultGeometryProgram; /*@}*/ /* GL_ATI_fragment_shader */ @@ -3151,12 +3140,29 @@ struct gl_framebuffer */ struct gl_config Visual; - GLuint Width, Height; /**< size of frame buffer in pixels */ + /** + * Size of frame buffer in pixels. If there are no attachments, then both + * of these are 0. + */ + GLuint Width, Height; - /** \name Drawing bounds (Intersection of buffer size and scissor box) */ + /** + * In the case that the framebuffer has no attachment (i.e. + * GL_ARB_framebuffer_no_attachments) then the geometry of + * the framebuffer is specified by the default values. + */ + struct { + GLuint Width, Height, Layers, NumSamples; + GLboolean FixedSampleLocations; + } DefaultGeometry; + + /** \name Drawing bounds (Intersection of buffer size and scissor box) + * The drawing region is given by [_Xmin, _Xmax) x [_Ymin, _Ymax), + * (inclusive for _Xmin and _Ymin while exclusive for _Xmax and _Ymax) + */ /*@{*/ - GLint _Xmin, _Xmax; /**< inclusive */ - GLint _Ymin, _Ymax; /**< exclusive */ + GLint _Xmin, _Xmax; + GLint _Ymin, _Ymax; /*@}*/ /** \name Derived Z buffer stuff */ @@ -3169,6 +3175,22 @@ struct gl_framebuffer /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */ GLenum _Status; + /** Whether one of Attachment has Type != GL_NONE + * NOTE: the values for Width and Height are set to 0 in case of having + * no attachments, a backend driver supporting the extension + * GL_ARB_framebuffer_no_attachments must check for the flag _HasAttachments + * and if GL_FALSE, must then use the values in DefaultGeometry to initialize + * its viewport, scissor and so on (in particular _Xmin, _Xmax, _Ymin and + * _Ymax do NOT take into account _HasAttachments being false). To get the + * geometry of the framebuffer, the helper functions + * _mesa_geometric_width(), + * _mesa_geometric_height(), + * _mesa_geometric_samples() and + * _mesa_geometric_layers() + * are available that check _HasAttachments. + */ + bool _HasAttachments; + /** Integer color values */ GLboolean _IntegerColor; @@ -3179,7 +3201,9 @@ struct gl_framebuffer /** * The maximum number of layers in the framebuffer, or 0 if the framebuffer * is not layered. For cube maps and cube map arrays, each cube face - * counts as a layer. + * counts as a layer. As the case for Width, Height a backend driver + * supporting GL_ARB_framebuffer_no_attachments must use DefaultGeometry + * in the case that _HasAttachments is false */ GLuint MaxNumLayers; @@ -3358,6 +3382,14 @@ struct gl_constants GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */ GLuint MaxSamples; /**< GL_ARB_framebuffer_object */ + /** + * GL_ARB_framebuffer_no_attachments + */ + GLuint MaxFramebufferWidth; + GLuint MaxFramebufferHeight; + GLuint MaxFramebufferLayers; + GLuint MaxFramebufferSamples; + /** Number of varying vectors between any two shader stages. */ GLuint MaxVarying; @@ -3635,6 +3667,7 @@ struct gl_extensions GLboolean ARB_fragment_program; GLboolean ARB_fragment_program_shadow; GLboolean ARB_fragment_shader; + GLboolean ARB_framebuffer_no_attachments; GLboolean ARB_framebuffer_object; GLboolean ARB_explicit_attrib_location; GLboolean ARB_explicit_uniform_location; @@ -4422,7 +4455,12 @@ enum _debug DEBUG_INCOMPLETE_FBO = (1 << 3) }; - +static inline bool +_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx) +{ + return ctx->Shader._CurrentFragmentProgram != NULL && + ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0; +} #ifdef __cplusplus } diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c index aecb5b1fa51..5626054687b 100644 --- a/src/mesa/main/objectlabel.c +++ b/src/mesa/main/objectlabel.c @@ -30,6 +30,7 @@ #include "enums.h" #include "fbobject.h" #include "objectlabel.h" +#include "pipelineobj.h" #include "queryobj.h" #include "samplerobj.h" #include "shaderobj.h" @@ -214,8 +215,13 @@ get_label_pointer(struct gl_context *ctx, GLenum identifier, GLuint name, } break; case GL_PROGRAM_PIPELINE: - /* requires GL 4.2 */ - goto invalid_enum; + { + struct gl_pipeline_object *pipe = + _mesa_lookup_pipeline_object(ctx, name); + if (pipe) + labelPtr = &pipe->Label; + } + break; default: goto invalid_enum; } diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 0fefa7d568b..279ae2078fe 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -65,6 +65,7 @@ _mesa_delete_pipeline_object(struct gl_context *ctx, _mesa_reference_shader_program(ctx, &obj->ActiveProgram, NULL); mtx_destroy(&obj->Mutex); + free(obj->Label); ralloc_free(obj); } @@ -136,8 +137,8 @@ _mesa_free_pipeline_data(struct gl_context *ctx) * a non-existent ID. The spec defines ID 0 as being technically * non-existent. */ -static inline struct gl_pipeline_object * -lookup_pipeline_object(struct gl_context *ctx, GLuint id) +struct gl_pipeline_object * +_mesa_lookup_pipeline_object(struct gl_context *ctx, GLuint id) { if (id == 0) return NULL; @@ -225,7 +226,7 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); struct gl_shader_program *shProg = NULL; GLbitfield any_valid_stages; @@ -337,7 +338,7 @@ _mesa_ActiveShaderProgram(GLuint pipeline, GLuint program) { GET_CURRENT_CONTEXT(ctx); struct gl_shader_program *shProg = NULL; - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (program != 0) { shProg = _mesa_lookup_shader_program_err(ctx, program, @@ -399,7 +400,7 @@ _mesa_BindProgramPipeline(GLuint pipeline) */ if (pipeline) { /* non-default pipeline object */ - newObj = lookup_pipeline_object(ctx, pipeline); + newObj = _mesa_lookup_pipeline_object(ctx, pipeline); if (!newObj) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBindProgramPipeline(non-gen name)"); @@ -468,7 +469,7 @@ _mesa_DeleteProgramPipelines(GLsizei n, const GLuint *pipelines) for (i = 0; i < n; i++) { struct gl_pipeline_object *obj = - lookup_pipeline_object(ctx, pipelines[i]); + _mesa_lookup_pipeline_object(ctx, pipelines[i]); if (obj) { assert(obj->Name == pipelines[i]); @@ -568,7 +569,7 @@ _mesa_IsProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *obj = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *obj = _mesa_lookup_pipeline_object(ctx, pipeline); if (obj == NULL) return GL_FALSE; @@ -582,7 +583,7 @@ void GLAPIENTRY _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); /* Are geometry shaders available in this context? */ @@ -673,6 +674,38 @@ program_stages_all_active(struct gl_pipeline_object *pipe, return status; } +static bool +program_stages_interleaved_illegally(const struct gl_pipeline_object *pipe) +{ + struct gl_shader_program *prev = NULL; + unsigned i, j; + + /* Look for programs bound to stages: A -> B -> A, with any intervening + * sequence of unrelated programs or empty stages. + */ + for (i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader_program *cur = pipe->CurrentProgram[i]; + + /* Empty stages anywhere in the pipe are OK */ + if (!cur || cur == prev) + continue; + + if (prev) { + /* We've seen an A -> B transition; look at the rest of the pipe + * to see if we ever see A again. + */ + for (j = i + 1; j < MESA_SHADER_STAGES; j++) { + if (pipe->CurrentProgram[j] == prev) + return true; + } + } + + prev = cur; + } + + return false; +} + extern GLboolean _mesa_validate_program_pipeline(struct gl_context* ctx, struct gl_pipeline_object *pipe, @@ -721,24 +754,13 @@ _mesa_validate_program_pipeline(struct gl_context* ctx, * - One program object is active for at least two shader stages * and a second program is active for a shader stage between two * stages for which the first program was active." - * - * Without Tesselation, the only case where this can occur is the geometry - * shader between the fragment shader and vertex shader. */ - if (pipe->CurrentProgram[MESA_SHADER_GEOMETRY] - && pipe->CurrentProgram[MESA_SHADER_FRAGMENT] - && pipe->CurrentProgram[MESA_SHADER_VERTEX]) { - if (pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name == pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name && - pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name != pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name) { - pipe->InfoLog = - ralloc_asprintf(pipe, - "Program %d is active for geometry stage between " - "two stages for which another program %d is " - "active", - pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name, - pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name); - goto err; - } + if (program_stages_interleaved_illegally(pipe)) { + pipe->InfoLog = + ralloc_strdup(pipe, + "Program is active for multiple shader stages with an " + "intervening stage provided by another program"); + goto err; } /* Section 2.11.11 (Shader Execution), subheading "Validation," of the @@ -820,7 +842,7 @@ _mesa_ValidateProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -838,7 +860,7 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize, { GET_CURRENT_CONTEXT(ctx); - struct gl_pipeline_object *pipe = lookup_pipeline_object(ctx, pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { _mesa_error(ctx, GL_INVALID_VALUE, diff --git a/src/mesa/main/pipelineobj.h b/src/mesa/main/pipelineobj.h index b57bcb99e5c..6dee775ab5e 100644 --- a/src/mesa/main/pipelineobj.h +++ b/src/mesa/main/pipelineobj.h @@ -45,6 +45,9 @@ _mesa_init_pipeline(struct gl_context *ctx); extern void _mesa_free_pipeline_data(struct gl_context *ctx); +extern struct gl_pipeline_object * +_mesa_lookup_pipeline_object(struct gl_context *ctx, GLuint id); + extern void _mesa_reference_pipeline_object_(struct gl_context *ctx, struct gl_pipeline_object **ptr, diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index b15a13210c0..d857b84e60d 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -220,12 +220,12 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface, case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: case GL_UNIFORM: - case GL_UNIFORM_BLOCK: case GL_TRANSFORM_FEEDBACK_VARYING: - /* Validate name syntax for arrays. */ + /* Validate name syntax for array variables */ if (!valid_program_resource_index_name(name)) return GL_INVALID_INDEX; - + /* fall-through */ + case GL_UNIFORM_BLOCK: res = _mesa_program_resource_find_name(shProg, programInterface, name); if (!res) return GL_INVALID_INDEX; diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index ed0104c9e46..a3357cd6419 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -46,15 +46,18 @@ /** * Return true if the conversion L=R+G+B is needed. */ -static GLboolean -need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format) +GLboolean +_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format) { GLenum baseTexFormat = _mesa_get_format_base_format(texFormat); return (baseTexFormat == GL_RG || baseTexFormat == GL_RGB || baseTexFormat == GL_RGBA) && - (format == GL_LUMINANCE || format == GL_LUMINANCE_ALPHA); + (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_LUMINANCE_INTEGER_EXT || + format == GL_LUMINANCE_ALPHA_INTEGER_EXT); } @@ -83,7 +86,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, if (uses_blit) { /* For blit-based ReadPixels packing, the clamping is done automatically * unless the type is float. */ - if (_mesa_get_clamp_read_color(ctx) && + if (_mesa_get_clamp_read_color(ctx, ctx->ReadBuffer) && (type == GL_FLOAT || type == GL_HALF_FLOAT)) { transferOps |= IMAGE_CLAMP_BIT; } @@ -91,7 +94,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, else { /* For CPU-based ReadPixels packing, the clamping must always be done * for non-float types, */ - if (_mesa_get_clamp_read_color(ctx) || + if (_mesa_get_clamp_read_color(ctx, ctx->ReadBuffer) || (type != GL_FLOAT && type != GL_HALF_FLOAT)) { transferOps |= IMAGE_CLAMP_BIT; } @@ -102,7 +105,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, * have any effect anyway. */ if (_mesa_get_format_datatype(texFormat) == GL_UNSIGNED_NORMALIZED && - !need_rgb_to_luminance_conversion(texFormat, format)) { + !_mesa_need_rgb_to_luminance_conversion(texFormat, format)) { transferOps &= ~IMAGE_CLAMP_BIT; } @@ -146,7 +149,7 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format, default: /* Color formats. */ - if (need_rgb_to_luminance_conversion(rb->Format, format)) { + if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) { return GL_TRUE; } @@ -418,7 +421,7 @@ read_rgba_pixels( struct gl_context *ctx, const struct gl_pixelstore_attrib *packing ) { GLbitfield transferOps; - bool dst_is_integer, dst_is_luminance, needs_rebase; + bool dst_is_integer, convert_rgb_to_lum, needs_rebase; int dst_stride, src_stride, rb_stride; uint32_t dst_format, src_format; GLubyte *dst, *map; @@ -439,10 +442,8 @@ read_rgba_pixels( struct gl_context *ctx, dst_is_integer = _mesa_is_enum_format_integer(format); dst_stride = _mesa_image_row_stride(packing, width, format, type); dst_format = _mesa_format_from_format_and_type(format, type); - dst_is_luminance = format == GL_LUMINANCE || - format == GL_LUMINANCE_ALPHA || - format == GL_LUMINANCE_INTEGER_EXT || - format == GL_LUMINANCE_ALPHA_INTEGER_EXT; + convert_rgb_to_lum = + _mesa_need_rgb_to_luminance_conversion(rb->Format, format); dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height, format, type, 0, 0); @@ -490,7 +491,7 @@ read_rgba_pixels( struct gl_context *ctx, */ assert(!transferOps || (transferOps && !dst_is_integer)); - needs_rgba = transferOps || dst_is_luminance; + needs_rgba = transferOps || convert_rgb_to_lum; rgba = NULL; if (needs_rgba) { uint32_t rgba_format; @@ -563,7 +564,7 @@ read_rgba_pixels( struct gl_context *ctx, * If the dst format is Luminance, we need to do the conversion by computing * L=R+G+B values. */ - if (!dst_is_luminance) { + if (!convert_rgb_to_lum) { _mesa_format_convert(dst, dst_format, dst_stride, src, src_format, src_stride, width, height, diff --git a/src/mesa/main/readpix.h b/src/mesa/main/readpix.h index 4bb35e17e4d..1636dd9ce3e 100644 --- a/src/mesa/main/readpix.h +++ b/src/mesa/main/readpix.h @@ -37,6 +37,9 @@ extern GLboolean _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format, GLenum type, GLboolean uses_blit); +extern GLboolean +_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format); + extern void _mesa_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 6e46553724b..a6246a39aad 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -28,6 +28,7 @@ * \author Ian Romanick <[email protected]> */ +#include "main/context.h" #include "main/core.h" #include "glsl_symbol_table.h" #include "ir.h" @@ -478,12 +479,20 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name) const char* _mesa_program_resource_name(struct gl_program_resource *res) { + const ir_variable *var; switch (res->Type) { case GL_UNIFORM_BLOCK: return RESOURCE_UBO(res)->Name; case GL_TRANSFORM_FEEDBACK_VARYING: return RESOURCE_XFB(res)->Name; case GL_PROGRAM_INPUT: + var = RESOURCE_VAR(res); + /* Special case gl_VertexIDMESA -> gl_VertexID. */ + if (var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) { + return "gl_VertexID"; + } + /* fallthrough */ case GL_PROGRAM_OUTPUT: return RESOURCE_VAR(res)->name; case GL_UNIFORM: @@ -538,6 +547,17 @@ struct gl_program_resource * _mesa_program_resource_find_name(struct gl_shader_program *shProg, GLenum programInterface, const char *name) { + GET_CURRENT_CONTEXT(ctx); + const char *full_name = name; + + /* When context has 'VertexID_is_zero_based' set, gl_VertexID has been + * lowered to gl_VertexIDMESA. + */ + if (name && ctx->Const.VertexID_is_zero_based) { + if (strcmp(name, "gl_VertexID") == 0) + full_name = "gl_VertexIDMESA"; + } + struct gl_program_resource *res = shProg->ProgramResourceList; for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) { if (res->Type != programInterface) @@ -562,7 +582,7 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg, break; case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: - if (array_index_of_resource(res, name) >= 0) + if (array_index_of_resource(res, full_name) >= 0) return res; break; default: @@ -727,6 +747,10 @@ program_resource_location(struct gl_shader_program *shProg, return -1; } + /* Built-in locations should report GL_INVALID_INDEX. */ + if (is_gl_identifier(name)) + return GL_INVALID_INDEX; + /* VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 are decremented as these * offsets are used internally to differentiate between built-in attributes * and user-defined attributes. @@ -986,8 +1010,9 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, case GL_ACTIVE_VARIABLES: return get_buffer_property(shProg, res, prop, val, caller); case GL_REFERENCED_BY_COMPUTE_SHADER: - if (!ctx->Extensions.ARB_compute_shader) + if (!_mesa_has_compute_shaders(ctx)) goto invalid_enum; + /* fallthrough */ case GL_REFERENCED_BY_VERTEX_SHADER: case GL_REFERENCED_BY_GEOMETRY_SHADER: case GL_REFERENCED_BY_FRAGMENT_SHADER: diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index a04b28711f7..a4296adf799 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -532,7 +532,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, /* True if geometry shaders (of the form that was adopted into GLSL 1.50 * and GL 3.2) are available in this context */ - const bool has_core_gs = _mesa_is_desktop_gl(ctx) && ctx->Version >= 32; + const bool has_core_gs = _mesa_has_geometry_shaders(ctx); /* Are uniform buffer objects available in this context? */ @@ -569,13 +569,13 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, *params = _mesa_longest_attribute_name_length(shProg); return; case GL_ACTIVE_UNIFORMS: - *params = shProg->NumUserUniformStorage - shProg->NumHiddenUniforms; + *params = shProg->NumUniformStorage - shProg->NumHiddenUniforms; return; case GL_ACTIVE_UNIFORM_MAX_LENGTH: { unsigned i; GLint max_len = 0; const unsigned num_uniforms = - shProg->NumUserUniformStorage - shProg->NumHiddenUniforms; + shProg->NumUniformStorage - shProg->NumHiddenUniforms; for (i = 0; i < num_uniforms; i++) { /* Add one for the terminating NUL character for a non-array, and diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index e428960362d..110a18e1e2c 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -282,10 +282,10 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) unsigned i; if (shProg->UniformStorage) { - for (i = 0; i < shProg->NumUserUniformStorage; ++i) + for (i = 0; i < shProg->NumUniformStorage; ++i) _mesa_uniform_detach_all_driver_storage(&shProg->UniformStorage[i]); ralloc_free(shProg->UniformStorage); - shProg->NumUserUniformStorage = 0; + shProg->NumUniformStorage = 0; shProg->UniformStorage = NULL; } diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index 0b76cc01218..d5ac9f1fb13 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -313,7 +313,6 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared) _mesa_DeleteHashTable(shared->Programs); _mesa_reference_vertprog(ctx, &shared->DefaultVertexProgram, NULL); - _mesa_reference_geomprog(ctx, &shared->DefaultGeometryProgram, NULL); _mesa_reference_fragprog(ctx, &shared->DefaultFragmentProgram, NULL); _mesa_HashDeleteAll(shared->ATIShaders, delete_fragshader_cb, ctx); diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 99db37bafd7..bede7fe1d0e 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -225,7 +225,7 @@ update_program(struct gl_context *ctx) if (ctx->GeometryProgram._Current != prevGP) { new_state |= _NEW_PROGRAM; if (ctx->Driver.BindProgram) { - ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM, + ctx->Driver.BindProgram(ctx, GL_GEOMETRY_PROGRAM_NV, (struct gl_program *) ctx->GeometryProgram._Current); } } @@ -266,15 +266,9 @@ update_program_constants(struct gl_context *ctx) } } - if (ctx->GeometryProgram._Current) { - const struct gl_program_parameter_list *params = - ctx->GeometryProgram._Current->Base.Parameters; - /*FIXME: StateFlags is always 0 because we have unnamed constant - * not state changes */ - if (params /*&& params->StateFlags & ctx->NewState*/) { - new_state |= _NEW_PROGRAM_CONSTANTS; - } - } + /* Don't handle geometry shaders here. They don't use any state + * constants. + */ if (ctx->VertexProgram._Current) { const struct gl_program_parameter_list *params = @@ -389,10 +383,10 @@ _mesa_update_state_locked( struct gl_context *ctx ) update_frontbit( ctx ); if (new_state & _NEW_BUFFERS) - _mesa_update_framebuffer(ctx); + _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer); if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) - _mesa_update_draw_buffer_bounds( ctx ); + _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); if (new_state & _NEW_LIGHT) _mesa_update_lighting( ctx ); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index ccd0124a2bb..800720b798e 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -68,10 +68,13 @@ struct function { int offset; }; +extern const struct function common_desktop_functions_possible[]; +extern const struct function gl_compatibility_functions_possible[]; extern const struct function gl_core_functions_possible[]; extern const struct function gles11_functions_possible[]; extern const struct function gles2_functions_possible[]; extern const struct function gles3_functions_possible[]; +extern const struct function gles31_functions_possible[]; class DispatchSanity_test : public ::testing::Test { public: @@ -96,7 +99,7 @@ DispatchSanity_test::SetUp() _mesa_init_driver_functions(&driver_functions); const unsigned size = _glapi_get_dispatch_table_size(); - nop_table = (_glapi_proc *) _glapi_new_nop_table(size); + nop_table = (_glapi_proc *) _mesa_new_nop_table(size); } void @@ -175,10 +178,19 @@ validate_nops(struct gl_context *ctx, const _glapi_proc *nop_table) TEST_F(DispatchSanity_test, GL31_CORE) { SetUpCtx(API_OPENGL_CORE, 31); + validate_functions(&ctx, common_desktop_functions_possible, nop_table); validate_functions(&ctx, gl_core_functions_possible, nop_table); validate_nops(&ctx, nop_table); } +TEST_F(DispatchSanity_test, GL30) +{ + SetUpCtx(API_OPENGL_COMPAT, 30); + validate_functions(&ctx, common_desktop_functions_possible, nop_table); + validate_functions(&ctx, gl_compatibility_functions_possible, nop_table); + validate_nops(&ctx, nop_table); +} + TEST_F(DispatchSanity_test, GLES11) { SetUpCtx(API_OPENGLES, 11); @@ -201,7 +213,16 @@ TEST_F(DispatchSanity_test, GLES3) validate_nops(&ctx, nop_table); } -const struct function gl_core_functions_possible[] = { +TEST_F(DispatchSanity_test, GLES31) +{ + SetUpCtx(API_OPENGLES2, 31); + validate_functions(&ctx, gles2_functions_possible, nop_table); + validate_functions(&ctx, gles3_functions_possible, nop_table); + validate_functions(&ctx, gles31_functions_possible, nop_table); + validate_nops(&ctx, nop_table); +} + +const struct function common_desktop_functions_possible[] = { { "glCullFace", 10, -1 }, { "glFrontFace", 10, -1 }, { "glHint", 10, -1 }, @@ -213,8 +234,8 @@ const struct function gl_core_functions_possible[] = { { "glTexParameterfv", 10, -1 }, { "glTexParameteri", 10, -1 }, { "glTexParameteriv", 10, -1 }, - { "glTexImage1D", 10, -1 }, - { "glTexImage2D", 10, -1 }, + { "glTexImage1D", 10, _gloffset_TexImage1D }, + { "glTexImage2D", 10, _gloffset_TexImage2D }, { "glDrawBuffer", 10, -1 }, { "glClear", 10, -1 }, { "glClearColor", 10, -1 }, @@ -482,7 +503,6 @@ const struct function gl_core_functions_possible[] = { /* GL 3.1 */ { "glDrawArraysInstanced", 31, -1 }, { "glDrawElementsInstanced", 31, -1 }, - { "glTexBuffer", 31, -1 }, { "glPrimitiveRestartIndex", 31, -1 }, /* GL_ARB_shader_objects */ @@ -535,12 +555,8 @@ const struct function gl_core_functions_possible[] = { { "glGetInteger64i_v", 32, -1 }, { "glGetBufferParameteri64v", 32, -1 }, { "glFramebufferTexture", 32, -1 }, - - /* GL_ARB_geometry_shader4 */ - { "glProgramParameteriARB", 32, -1 }, - { "glFramebufferTextureARB", 32, -1 }, - { "glFramebufferTextureLayerARB", 32, -1 }, - { "glFramebufferTextureFaceARB", 32, -1 }, + { "glProgramParameteri", 32, -1 }, + { "glFramebufferTextureLayer", 32, -1 }, /* GL 3.3 */ { "glVertexAttribDivisor", 33, -1 }, @@ -673,34 +689,6 @@ const struct function gl_core_functions_possible[] = { { "glVertexAttribP4uiv", 43, -1 }, { "glDrawArraysIndirect", 43, -1 }, { "glDrawElementsIndirect", 43, -1 }, - { "glUniform1d", 40, -1 }, - { "glUniform2d", 40, -1 }, - { "glUniform3d", 40, -1 }, - { "glUniform4d", 40, -1 }, - { "glUniform1dv", 40, -1 }, - { "glUniform2dv", 40, -1 }, - { "glUniform3dv", 40, -1 }, - { "glUniform4dv", 40, -1 }, - { "glUniformMatrix2dv", 40, -1 }, - { "glUniformMatrix3dv", 40, -1 }, - { "glUniformMatrix4dv", 40, -1 }, - { "glUniformMatrix2x3dv", 40, -1 }, - { "glUniformMatrix2x4dv", 40, -1 }, - { "glUniformMatrix3x2dv", 40, -1 }, - { "glUniformMatrix3x4dv", 40, -1 }, - { "glUniformMatrix4x2dv", 40, -1 }, - { "glUniformMatrix4x3dv", 40, -1 }, - { "glGetUniformdv", 43, -1 }, -// { "glGetSubroutineUniformLocation", 43, -1 }, // XXX: Add to xml -// { "glGetSubroutineIndex", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineUniformiv", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineUniformName", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineName", 43, -1 }, // XXX: Add to xml -// { "glUniformSubroutinesuiv", 43, -1 }, // XXX: Add to xml -// { "glGetUniformSubroutineuiv", 43, -1 }, // XXX: Add to xml -// { "glGetProgramStageiv", 43, -1 }, // XXX: Add to xml -// { "glPatchParameteri", 43, -1 }, // XXX: Add to xml -// { "glPatchParameterfv", 43, -1 }, // XXX: Add to xml { "glBindTransformFeedback", 43, -1 }, { "glDeleteTransformFeedbacks", 43, -1 }, { "glGenTransformFeedbacks", 43, -1 }, @@ -728,12 +716,12 @@ const struct function gl_core_functions_possible[] = { { "glGenProgramPipelines", 43, -1 }, { "glIsProgramPipeline", 43, -1 }, { "glGetProgramPipelineiv", 43, -1 }, + { "glProgramUniform1d", 43, -1 }, + { "glProgramUniform1dv", 43, -1 }, { "glProgramUniform1i", 43, -1 }, { "glProgramUniform1iv", 43, -1 }, { "glProgramUniform1f", 43, -1 }, { "glProgramUniform1fv", 43, -1 }, - { "glProgramUniform1d", 40, -1 }, - { "glProgramUniform1dv", 40, -1 }, { "glProgramUniform1ui", 43, -1 }, { "glProgramUniform1uiv", 43, -1 }, { "glProgramUniform2i", 43, -1 }, @@ -754,50 +742,32 @@ const struct function gl_core_functions_possible[] = { { "glProgramUniform3uiv", 43, -1 }, { "glProgramUniform4i", 43, -1 }, { "glProgramUniform4iv", 43, -1 }, + { "glProgramUniform4d", 43, -1 }, + { "glProgramUniform4dv", 43, -1 }, { "glProgramUniform4f", 43, -1 }, { "glProgramUniform4fv", 43, -1 }, - { "glProgramUniform4d", 40, -1 }, - { "glProgramUniform4dv", 40, -1 }, { "glProgramUniform4ui", 43, -1 }, { "glProgramUniform4uiv", 43, -1 }, + { "glProgramUniformMatrix2dv", 43, -1 }, { "glProgramUniformMatrix2fv", 43, -1 }, + { "glProgramUniformMatrix3dv", 43, -1 }, { "glProgramUniformMatrix3fv", 43, -1 }, + { "glProgramUniformMatrix4dv", 43, -1 }, { "glProgramUniformMatrix4fv", 43, -1 }, - { "glProgramUniformMatrix2dv", 40, -1 }, - { "glProgramUniformMatrix3dv", 40, -1 }, - { "glProgramUniformMatrix4dv", 40, -1 }, + { "glProgramUniformMatrix2x3dv", 43, -1 }, { "glProgramUniformMatrix2x3fv", 43, -1 }, + { "glProgramUniformMatrix3x2dv", 43, -1 }, { "glProgramUniformMatrix3x2fv", 43, -1 }, + { "glProgramUniformMatrix2x4dv", 43, -1 }, { "glProgramUniformMatrix2x4fv", 43, -1 }, + { "glProgramUniformMatrix4x2dv", 43, -1 }, { "glProgramUniformMatrix4x2fv", 43, -1 }, + { "glProgramUniformMatrix3x4dv", 43, -1 }, { "glProgramUniformMatrix3x4fv", 43, -1 }, + { "glProgramUniformMatrix4x3dv", 43, -1 }, { "glProgramUniformMatrix4x3fv", 43, -1 }, - { "glProgramUniformMatrix2x3dv", 40, -1 }, - { "glProgramUniformMatrix3x2dv", 40, -1 }, - { "glProgramUniformMatrix2x4dv", 40, -1 }, - { "glProgramUniformMatrix4x2dv", 40, -1 }, - { "glProgramUniformMatrix3x4dv", 40, -1 }, - { "glProgramUniformMatrix4x3dv", 40, -1 }, { "glValidateProgramPipeline", 43, -1 }, { "glGetProgramPipelineInfoLog", 43, -1 }, - { "glVertexAttribL1d", 41, -1 }, - { "glVertexAttribL2d", 41, -1 }, - { "glVertexAttribL3d", 41, -1 }, - { "glVertexAttribL4d", 41, -1 }, - { "glVertexAttribL1dv", 41, -1 }, - { "glVertexAttribL2dv", 41, -1 }, - { "glVertexAttribL3dv", 41, -1 }, - { "glVertexAttribL4dv", 41, -1 }, - { "glVertexAttribLPointer", 41, -1 }, - { "glGetVertexAttribLdv", 41, -1 }, - { "glViewportArrayv", 43, -1 }, - { "glViewportIndexedf", 43, -1 }, - { "glViewportIndexedfv", 43, -1 }, - { "glScissorArrayv", 43, -1 }, - { "glScissorIndexed", 43, -1 }, - { "glScissorIndexedv", 43, -1 }, - { "glDepthRangeArrayv", 43, -1 }, - { "glDepthRangeIndexed", 43, -1 }, { "glGetFloati_v", 43, -1 }, { "glGetDoublei_v", 43, -1 }, // { "glCreateSyncFromCLeventARB", 43, -1 }, // XXX: Add to xml @@ -840,8 +810,6 @@ const struct function gl_core_functions_possible[] = { { "glClearBufferSubData", 43, -1 }, // { "glClearNamedBufferDataEXT", 43, -1 }, // XXX: Add to xml // { "glClearNamedBufferSubDataEXT", 43, -1 }, // XXX: Add to xml - { "glDispatchCompute", 43, -1 }, - { "glDispatchComputeIndirect", 43, -1 }, { "glCopyImageSubData", 43, -1 }, { "glTextureView", 43, -1 }, { "glBindVertexBuffer", 43, -1 }, @@ -853,11 +821,10 @@ const struct function gl_core_functions_possible[] = { // { "glVertexArrayBindVertexBufferEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexAttribFormatEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexAttribIFormatEXT", 43, -1 }, // XXX: Add to xml -// { "glVertexArrayVertexAttribLFormatEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexAttribBindingEXT", 43, -1 }, // XXX: Add to xml // { "glVertexArrayVertexBindingDivisorEXT", 43, -1 }, // XXX: Add to xml -// { "glFramebufferParameteri", 43, -1 }, // XXX: Add to xml -// { "glGetFramebufferParameteriv", 43, -1 }, // XXX: Add to xml + { "glFramebufferParameteri", 43, -1 }, + { "glGetFramebufferParameteriv", 43, -1 }, // { "glNamedFramebufferParameteriEXT", 43, -1 }, // XXX: Add to xml // { "glGetNamedFramebufferParameterivEXT", 43, -1 }, // XXX: Add to xml // { "glGetInternalformati64v", 43, -1 }, // XXX: Add to xml @@ -876,7 +843,6 @@ const struct function gl_core_functions_possible[] = { { "glGetProgramResourceLocation", 43, -1 }, { "glGetProgramResourceLocationIndex", 43, -1 }, // { "glShaderStorageBlockBinding", 43, -1 }, // XXX: Add to xml - { "glTexBufferRange", 43, -1 }, // { "glTextureBufferRangeEXT", 43, -1 }, // XXX: Add to xml { "glTexStorage2DMultisample", 43, -1 }, { "glTexStorage3DMultisample", 43, -1 }, @@ -958,6 +924,814 @@ const struct function gl_core_functions_possible[] = { /* GL_ARB_clip_control */ { "glClipControl", 45, -1 }, + /* GL_ARB_compute_shader */ + { "glDispatchCompute", 43, -1 }, + { "glDispatchComputeIndirect", 43, -1 }, + + /* GL_EXT_polygon_offset_clamp */ + { "glPolygonOffsetClampEXT", 11, -1 }, + { NULL, 0, -1 } +}; + +const struct function gl_compatibility_functions_possible[] = { + { "glBindVertexArrayAPPLE", 10, -1 }, + { "glGenVertexArraysAPPLE", 10, -1 }, + { "glBindRenderbufferEXT", 10, -1 }, + { "glBindFramebufferEXT", 10, -1 }, + { "glNewList", 10, _gloffset_NewList }, + { "glEndList", 10, _gloffset_EndList }, + { "glCallList", 10, _gloffset_CallList }, + { "glCallLists", 10, _gloffset_CallLists }, + { "glDeleteLists", 10, _gloffset_DeleteLists }, + { "glGenLists", 10, _gloffset_GenLists }, + { "glListBase", 10, _gloffset_ListBase }, + { "glBegin", 10, _gloffset_Begin }, + { "glBitmap", 10, _gloffset_Bitmap }, + { "glColor3b", 10, _gloffset_Color3b }, + { "glColor3bv", 10, _gloffset_Color3bv }, + { "glColor3d", 10, _gloffset_Color3d }, + { "glColor3dv", 10, _gloffset_Color3dv }, + { "glColor3f", 10, _gloffset_Color3f }, + { "glColor3fv", 10, _gloffset_Color3fv }, + { "glColor3i", 10, _gloffset_Color3i }, + { "glColor3iv", 10, _gloffset_Color3iv }, + { "glColor3s", 10, _gloffset_Color3s }, + { "glColor3sv", 10, _gloffset_Color3sv }, + { "glColor3ub", 10, _gloffset_Color3ub }, + { "glColor3ubv", 10, _gloffset_Color3ubv }, + { "glColor3ui", 10, _gloffset_Color3ui }, + { "glColor3uiv", 10, _gloffset_Color3uiv }, + { "glColor3us", 10, _gloffset_Color3us }, + { "glColor3usv", 10, _gloffset_Color3usv }, + { "glColor4b", 10, _gloffset_Color4b }, + { "glColor4bv", 10, _gloffset_Color4bv }, + { "glColor4d", 10, _gloffset_Color4d }, + { "glColor4dv", 10, _gloffset_Color4dv }, + { "glColor4f", 10, _gloffset_Color4f }, + { "glColor4fv", 10, _gloffset_Color4fv }, + { "glColor4i", 10, _gloffset_Color4i }, + { "glColor4iv", 10, _gloffset_Color4iv }, + { "glColor4s", 10, _gloffset_Color4s }, + { "glColor4sv", 10, _gloffset_Color4sv }, + { "glColor4ub", 10, _gloffset_Color4ub }, + { "glColor4ubv", 10, _gloffset_Color4ubv }, + { "glColor4ui", 10, _gloffset_Color4ui }, + { "glColor4uiv", 10, _gloffset_Color4uiv }, + { "glColor4us", 10, _gloffset_Color4us }, + { "glColor4usv", 10, _gloffset_Color4usv }, + { "glEdgeFlag", 10, _gloffset_EdgeFlag }, + { "glEdgeFlagv", 10, _gloffset_EdgeFlagv }, + { "glEnd", 10, _gloffset_End }, + { "glIndexd", 10, _gloffset_Indexd }, + { "glIndexdv", 10, _gloffset_Indexdv }, + { "glIndexf", 10, _gloffset_Indexf }, + { "glIndexfv", 10, _gloffset_Indexfv }, + { "glIndexi", 10, _gloffset_Indexi }, + { "glIndexiv", 10, _gloffset_Indexiv }, + { "glIndexs", 10, _gloffset_Indexs }, + { "glIndexsv", 10, _gloffset_Indexsv }, + { "glNormal3b", 10, _gloffset_Normal3b }, + { "glNormal3bv", 10, _gloffset_Normal3bv }, + { "glNormal3d", 10, _gloffset_Normal3d }, + { "glNormal3dv", 10, _gloffset_Normal3dv }, + { "glNormal3f", 10, _gloffset_Normal3f }, + { "glNormal3fv", 10, _gloffset_Normal3fv }, + { "glNormal3i", 10, _gloffset_Normal3i }, + { "glNormal3iv", 10, _gloffset_Normal3iv }, + { "glNormal3s", 10, _gloffset_Normal3s }, + { "glNormal3sv", 10, _gloffset_Normal3sv }, + { "glRasterPos2d", 10, _gloffset_RasterPos2d }, + { "glRasterPos2dv", 10, _gloffset_RasterPos2dv }, + { "glRasterPos2f", 10, _gloffset_RasterPos2f }, + { "glRasterPos2fv", 10, _gloffset_RasterPos2fv }, + { "glRasterPos2i", 10, _gloffset_RasterPos2i }, + { "glRasterPos2iv", 10, _gloffset_RasterPos2iv }, + { "glRasterPos2s", 10, _gloffset_RasterPos2s }, + { "glRasterPos2sv", 10, _gloffset_RasterPos2sv }, + { "glRasterPos3d", 10, _gloffset_RasterPos3d }, + { "glRasterPos3dv", 10, _gloffset_RasterPos3dv }, + { "glRasterPos3f", 10, _gloffset_RasterPos3f }, + { "glRasterPos3fv", 10, _gloffset_RasterPos3fv }, + { "glRasterPos3i", 10, _gloffset_RasterPos3i }, + { "glRasterPos3iv", 10, _gloffset_RasterPos3iv }, + { "glRasterPos3s", 10, _gloffset_RasterPos3s }, + { "glRasterPos3sv", 10, _gloffset_RasterPos3sv }, + { "glRasterPos4d", 10, _gloffset_RasterPos4d }, + { "glRasterPos4dv", 10, _gloffset_RasterPos4dv }, + { "glRasterPos4f", 10, _gloffset_RasterPos4f }, + { "glRasterPos4fv", 10, _gloffset_RasterPos4fv }, + { "glRasterPos4i", 10, _gloffset_RasterPos4i }, + { "glRasterPos4iv", 10, _gloffset_RasterPos4iv }, + { "glRasterPos4s", 10, _gloffset_RasterPos4s }, + { "glRasterPos4sv", 10, _gloffset_RasterPos4sv }, + { "glRectd", 10, _gloffset_Rectd }, + { "glRectdv", 10, _gloffset_Rectdv }, + { "glRectf", 10, _gloffset_Rectf }, + { "glRectfv", 10, _gloffset_Rectfv }, + { "glRecti", 10, _gloffset_Recti }, + { "glRectiv", 10, _gloffset_Rectiv }, + { "glRects", 10, _gloffset_Rects }, + { "glRectsv", 10, _gloffset_Rectsv }, + { "glTexCoord1d", 10, _gloffset_TexCoord1d }, + { "glTexCoord1dv", 10, _gloffset_TexCoord1dv }, + { "glTexCoord1f", 10, _gloffset_TexCoord1f }, + { "glTexCoord1fv", 10, _gloffset_TexCoord1fv }, + { "glTexCoord1i", 10, _gloffset_TexCoord1i }, + { "glTexCoord1iv", 10, _gloffset_TexCoord1iv }, + { "glTexCoord1s", 10, _gloffset_TexCoord1s }, + { "glTexCoord1sv", 10, _gloffset_TexCoord1sv }, + { "glTexCoord2d", 10, _gloffset_TexCoord2d }, + { "glTexCoord2dv", 10, _gloffset_TexCoord2dv }, + { "glTexCoord2f", 10, _gloffset_TexCoord2f }, + { "glTexCoord2fv", 10, _gloffset_TexCoord2fv }, + { "glTexCoord2i", 10, _gloffset_TexCoord2i }, + { "glTexCoord2iv", 10, _gloffset_TexCoord2iv }, + { "glTexCoord2s", 10, _gloffset_TexCoord2s }, + { "glTexCoord2sv", 10, _gloffset_TexCoord2sv }, + { "glTexCoord3d", 10, _gloffset_TexCoord3d }, + { "glTexCoord3dv", 10, _gloffset_TexCoord3dv }, + { "glTexCoord3f", 10, _gloffset_TexCoord3f }, + { "glTexCoord3fv", 10, _gloffset_TexCoord3fv }, + { "glTexCoord3i", 10, _gloffset_TexCoord3i }, + { "glTexCoord3iv", 10, _gloffset_TexCoord3iv }, + { "glTexCoord3s", 10, _gloffset_TexCoord3s }, + { "glTexCoord3sv", 10, _gloffset_TexCoord3sv }, + { "glTexCoord4d", 10, _gloffset_TexCoord4d }, + { "glTexCoord4dv", 10, _gloffset_TexCoord4dv }, + { "glTexCoord4f", 10, _gloffset_TexCoord4f }, + { "glTexCoord4fv", 10, _gloffset_TexCoord4fv }, + { "glTexCoord4i", 10, _gloffset_TexCoord4i }, + { "glTexCoord4iv", 10, _gloffset_TexCoord4iv }, + { "glTexCoord4s", 10, _gloffset_TexCoord4s }, + { "glTexCoord4sv", 10, _gloffset_TexCoord4sv }, + { "glVertex2d", 10, _gloffset_Vertex2d }, + { "glVertex2dv", 10, _gloffset_Vertex2dv }, + { "glVertex2f", 10, _gloffset_Vertex2f }, + { "glVertex2fv", 10, _gloffset_Vertex2fv }, + { "glVertex2i", 10, _gloffset_Vertex2i }, + { "glVertex2iv", 10, _gloffset_Vertex2iv }, + { "glVertex2s", 10, _gloffset_Vertex2s }, + { "glVertex2sv", 10, _gloffset_Vertex2sv }, + { "glVertex3d", 10, _gloffset_Vertex3d }, + { "glVertex3dv", 10, _gloffset_Vertex3dv }, + { "glVertex3f", 10, _gloffset_Vertex3f }, + { "glVertex3fv", 10, _gloffset_Vertex3fv }, + { "glVertex3i", 10, _gloffset_Vertex3i }, + { "glVertex3iv", 10, _gloffset_Vertex3iv }, + { "glVertex3s", 10, _gloffset_Vertex3s }, + { "glVertex3sv", 10, _gloffset_Vertex3sv }, + { "glVertex4d", 10, _gloffset_Vertex4d }, + { "glVertex4dv", 10, _gloffset_Vertex4dv }, + { "glVertex4f", 10, _gloffset_Vertex4f }, + { "glVertex4fv", 10, _gloffset_Vertex4fv }, + { "glVertex4i", 10, _gloffset_Vertex4i }, + { "glVertex4iv", 10, _gloffset_Vertex4iv }, + { "glVertex4s", 10, _gloffset_Vertex4s }, + { "glVertex4sv", 10, _gloffset_Vertex4sv }, + { "glClipPlane", 10, _gloffset_ClipPlane }, + { "glColorMaterial", 10, _gloffset_ColorMaterial }, + { "glFogf", 10, _gloffset_Fogf }, + { "glFogfv", 10, _gloffset_Fogfv }, + { "glFogi", 10, _gloffset_Fogi }, + { "glFogiv", 10, _gloffset_Fogiv }, + { "glLightf", 10, _gloffset_Lightf }, + { "glLightfv", 10, _gloffset_Lightfv }, + { "glLighti", 10, _gloffset_Lighti }, + { "glLightiv", 10, _gloffset_Lightiv }, + { "glLightModelf", 10, _gloffset_LightModelf }, + { "glLightModelfv", 10, _gloffset_LightModelfv }, + { "glLightModeli", 10, _gloffset_LightModeli }, + { "glLightModeliv", 10, _gloffset_LightModeliv }, + { "glLineStipple", 10, _gloffset_LineStipple }, + { "glMaterialf", 10, _gloffset_Materialf }, + { "glMaterialfv", 10, _gloffset_Materialfv }, + { "glMateriali", 10, _gloffset_Materiali }, + { "glMaterialiv", 10, _gloffset_Materialiv }, + { "glPolygonStipple", 10, _gloffset_PolygonStipple }, + { "glShadeModel", 10, _gloffset_ShadeModel }, + { "glTexEnvf", 10, _gloffset_TexEnvf }, + { "glTexEnvfv", 10, _gloffset_TexEnvfv }, + { "glTexEnvi", 10, _gloffset_TexEnvi }, + { "glTexEnviv", 10, _gloffset_TexEnviv }, + { "glTexGend", 10, _gloffset_TexGend }, + { "glTexGendv", 10, _gloffset_TexGendv }, + { "glTexGenf", 10, _gloffset_TexGenf }, + { "glTexGenfv", 10, _gloffset_TexGenfv }, + { "glTexGeni", 10, _gloffset_TexGeni }, + { "glTexGeniv", 10, _gloffset_TexGeniv }, + { "glFeedbackBuffer", 10, _gloffset_FeedbackBuffer }, + { "glSelectBuffer", 10, _gloffset_SelectBuffer }, + { "glRenderMode", 10, _gloffset_RenderMode }, + { "glInitNames", 10, _gloffset_InitNames }, + { "glLoadName", 10, _gloffset_LoadName }, + { "glPassThrough", 10, _gloffset_PassThrough }, + { "glPopName", 10, _gloffset_PopName }, + { "glPushName", 10, _gloffset_PushName }, + { "glClearAccum", 10, _gloffset_ClearAccum }, + { "glClearIndex", 10, _gloffset_ClearIndex }, + { "glIndexMask", 10, _gloffset_IndexMask }, + { "glAccum", 10, _gloffset_Accum }, + { "glPopAttrib", 10, _gloffset_PopAttrib }, + { "glPushAttrib", 10, _gloffset_PushAttrib }, + { "glMap1d", 10, _gloffset_Map1d }, + { "glMap1f", 10, _gloffset_Map1f }, + { "glMap2d", 10, _gloffset_Map2d }, + { "glMap2f", 10, _gloffset_Map2f }, + { "glMapGrid1d", 10, _gloffset_MapGrid1d }, + { "glMapGrid1f", 10, _gloffset_MapGrid1f }, + { "glMapGrid2d", 10, _gloffset_MapGrid2d }, + { "glMapGrid2f", 10, _gloffset_MapGrid2f }, + { "glEvalCoord1d", 10, _gloffset_EvalCoord1d }, + { "glEvalCoord1dv", 10, _gloffset_EvalCoord1dv }, + { "glEvalCoord1f", 10, _gloffset_EvalCoord1f }, + { "glEvalCoord1fv", 10, _gloffset_EvalCoord1fv }, + { "glEvalCoord2d", 10, _gloffset_EvalCoord2d }, + { "glEvalCoord2dv", 10, _gloffset_EvalCoord2dv }, + { "glEvalCoord2f", 10, _gloffset_EvalCoord2f }, + { "glEvalCoord2fv", 10, _gloffset_EvalCoord2fv }, + { "glEvalMesh1", 10, _gloffset_EvalMesh1 }, + { "glEvalPoint1", 10, _gloffset_EvalPoint1 }, + { "glEvalMesh2", 10, _gloffset_EvalMesh2 }, + { "glEvalPoint2", 10, _gloffset_EvalPoint2 }, + { "glAlphaFunc", 10, _gloffset_AlphaFunc }, + { "glPixelZoom", 10, _gloffset_PixelZoom }, + { "glPixelTransferf", 10, _gloffset_PixelTransferf }, + { "glPixelTransferi", 10, _gloffset_PixelTransferi }, + { "glPixelMapfv", 10, _gloffset_PixelMapfv }, + { "glPixelMapuiv", 10, _gloffset_PixelMapuiv }, + { "glPixelMapusv", 10, _gloffset_PixelMapusv }, + { "glCopyPixels", 10, _gloffset_CopyPixels }, + { "glDrawPixels", 10, _gloffset_DrawPixels }, + { "glGetClipPlane", 10, _gloffset_GetClipPlane }, + { "glGetLightfv", 10, _gloffset_GetLightfv }, + { "glGetLightiv", 10, _gloffset_GetLightiv }, + { "glGetMapdv", 10, _gloffset_GetMapdv }, + { "glGetMapfv", 10, _gloffset_GetMapfv }, + { "glGetMapiv", 10, _gloffset_GetMapiv }, + { "glGetMaterialfv", 10, _gloffset_GetMaterialfv }, + { "glGetMaterialiv", 10, _gloffset_GetMaterialiv }, + { "glGetPixelMapfv", 10, _gloffset_GetPixelMapfv }, + { "glGetPixelMapuiv", 10, _gloffset_GetPixelMapuiv }, + { "glGetPixelMapusv", 10, _gloffset_GetPixelMapusv }, + { "glGetPolygonStipple", 10, _gloffset_GetPolygonStipple }, + { "glGetTexEnvfv", 10, _gloffset_GetTexEnvfv }, + { "glGetTexEnviv", 10, _gloffset_GetTexEnviv }, + { "glGetTexGendv", 10, _gloffset_GetTexGendv }, + { "glGetTexGenfv", 10, _gloffset_GetTexGenfv }, + { "glGetTexGeniv", 10, _gloffset_GetTexGeniv }, + { "glIsList", 10, _gloffset_IsList }, + { "glFrustum", 10, _gloffset_Frustum }, + { "glLoadIdentity", 10, _gloffset_LoadIdentity }, + { "glLoadMatrixf", 10, _gloffset_LoadMatrixf }, + { "glLoadMatrixd", 10, _gloffset_LoadMatrixd }, + { "glMatrixMode", 10, _gloffset_MatrixMode }, + { "glMultMatrixf", 10, _gloffset_MultMatrixf }, + { "glMultMatrixd", 10, _gloffset_MultMatrixd }, + { "glOrtho", 10, _gloffset_Ortho }, + { "glPopMatrix", 10, _gloffset_PopMatrix }, + { "glPushMatrix", 10, _gloffset_PushMatrix }, + { "glRotated", 10, _gloffset_Rotated }, + { "glRotatef", 10, _gloffset_Rotatef }, + { "glScaled", 10, _gloffset_Scaled }, + { "glScalef", 10, _gloffset_Scalef }, + { "glTranslated", 10, _gloffset_Translated }, + { "glTranslatef", 10, _gloffset_Translatef }, + { "glArrayElement", 10, _gloffset_ArrayElement }, + { "glColorPointer", 10, _gloffset_ColorPointer }, + { "glDisableClientState", 10, _gloffset_DisableClientState }, + { "glEdgeFlagPointer", 10, _gloffset_EdgeFlagPointer }, + { "glEnableClientState", 10, _gloffset_EnableClientState }, + { "glIndexPointer", 10, _gloffset_IndexPointer }, + { "glInterleavedArrays", 10, _gloffset_InterleavedArrays }, + { "glNormalPointer", 10, _gloffset_NormalPointer }, + { "glTexCoordPointer", 10, _gloffset_TexCoordPointer }, + { "glVertexPointer", 10, _gloffset_VertexPointer }, + { "glAreTexturesResident", 10, _gloffset_AreTexturesResident }, + { "glPrioritizeTextures", 10, _gloffset_PrioritizeTextures }, + { "glIndexub", 10, _gloffset_Indexub }, + { "glIndexubv", 10, _gloffset_Indexubv }, + { "glPopClientAttrib", 10, _gloffset_PopClientAttrib }, + { "glPushClientAttrib", 10, _gloffset_PushClientAttrib }, + { "glColorTable", 10, _gloffset_ColorTable }, + { "glColorTableParameterfv", 10, _gloffset_ColorTableParameterfv }, + { "glColorTableParameteriv", 10, _gloffset_ColorTableParameteriv }, + { "glCopyColorTable", 10, _gloffset_CopyColorTable }, + { "glGetColorTable", 10, _gloffset_GetColorTable }, + { "glGetColorTableParameterfv", 10, _gloffset_GetColorTableParameterfv }, + { "glGetColorTableParameteriv", 10, _gloffset_GetColorTableParameteriv }, + { "glColorSubTable", 10, _gloffset_ColorSubTable }, + { "glCopyColorSubTable", 10, _gloffset_CopyColorSubTable }, + { "glConvolutionFilter1D", 10, _gloffset_ConvolutionFilter1D }, + { "glConvolutionFilter2D", 10, _gloffset_ConvolutionFilter2D }, + { "glConvolutionParameterf", 10, _gloffset_ConvolutionParameterf }, + { "glConvolutionParameterfv", 10, _gloffset_ConvolutionParameterfv }, + { "glConvolutionParameteri", 10, _gloffset_ConvolutionParameteri }, + { "glConvolutionParameteriv", 10, _gloffset_ConvolutionParameteriv }, + { "glCopyConvolutionFilter1D", 10, _gloffset_CopyConvolutionFilter1D }, + { "glCopyConvolutionFilter2D", 10, _gloffset_CopyConvolutionFilter2D }, + { "glGetConvolutionFilter", 10, _gloffset_GetConvolutionFilter }, + { "glGetConvolutionParameterfv", 10, _gloffset_GetConvolutionParameterfv }, + { "glGetConvolutionParameteriv", 10, _gloffset_GetConvolutionParameteriv }, + { "glGetSeparableFilter", 10, _gloffset_GetSeparableFilter }, + { "glSeparableFilter2D", 10, _gloffset_SeparableFilter2D }, + { "glGetHistogram", 10, _gloffset_GetHistogram }, + { "glGetHistogramParameterfv", 10, _gloffset_GetHistogramParameterfv }, + { "glGetHistogramParameteriv", 10, _gloffset_GetHistogramParameteriv }, + { "glGetMinmax", 10, _gloffset_GetMinmax }, + { "glGetMinmaxParameterfv", 10, _gloffset_GetMinmaxParameterfv }, + { "glGetMinmaxParameteriv", 10, _gloffset_GetMinmaxParameteriv }, + { "glHistogram", 10, _gloffset_Histogram }, + { "glMinmax", 10, _gloffset_Minmax }, + { "glResetHistogram", 10, _gloffset_ResetHistogram }, + { "glResetMinmax", 10, _gloffset_ResetMinmax }, + { "glClientActiveTexture", 10, _gloffset_ClientActiveTexture }, + { "glMultiTexCoord1d", 10, _gloffset_MultiTexCoord1d }, + { "glMultiTexCoord1dv", 10, _gloffset_MultiTexCoord1dv }, + { "glMultiTexCoord1f", 10, _gloffset_MultiTexCoord1fARB }, + { "glMultiTexCoord1fv", 10, _gloffset_MultiTexCoord1fvARB }, + { "glMultiTexCoord1i", 10, _gloffset_MultiTexCoord1i }, + { "glMultiTexCoord1iv", 10, _gloffset_MultiTexCoord1iv }, + { "glMultiTexCoord1s", 10, _gloffset_MultiTexCoord1s }, + { "glMultiTexCoord1sv", 10, _gloffset_MultiTexCoord1sv }, + { "glMultiTexCoord2d", 10, _gloffset_MultiTexCoord2d }, + { "glMultiTexCoord2dv", 10, _gloffset_MultiTexCoord2dv }, + { "glMultiTexCoord2f", 10, _gloffset_MultiTexCoord2fARB }, + { "glMultiTexCoord2fv", 10, _gloffset_MultiTexCoord2fvARB }, + { "glMultiTexCoord2i", 10, _gloffset_MultiTexCoord2i }, + { "glMultiTexCoord2iv", 10, _gloffset_MultiTexCoord2iv }, + { "glMultiTexCoord2s", 10, _gloffset_MultiTexCoord2s }, + { "glMultiTexCoord2sv", 10, _gloffset_MultiTexCoord2sv }, + { "glMultiTexCoord3d", 10, _gloffset_MultiTexCoord3d }, + { "glMultiTexCoord3dv", 10, _gloffset_MultiTexCoord3dv }, + { "glMultiTexCoord3f", 10, _gloffset_MultiTexCoord3fARB }, + { "glMultiTexCoord3fv", 10, _gloffset_MultiTexCoord3fvARB }, + { "glMultiTexCoord3i", 10, _gloffset_MultiTexCoord3i }, + { "glMultiTexCoord3iv", 10, _gloffset_MultiTexCoord3iv }, + { "glMultiTexCoord3s", 10, _gloffset_MultiTexCoord3s }, + { "glMultiTexCoord3sv", 10, _gloffset_MultiTexCoord3sv }, + { "glMultiTexCoord4d", 10, _gloffset_MultiTexCoord4d }, + { "glMultiTexCoord4dv", 10, _gloffset_MultiTexCoord4dv }, + { "glMultiTexCoord4f", 10, _gloffset_MultiTexCoord4fARB }, + { "glMultiTexCoord4fv", 10, _gloffset_MultiTexCoord4fvARB }, + { "glMultiTexCoord4i", 10, _gloffset_MultiTexCoord4i }, + { "glMultiTexCoord4iv", 10, _gloffset_MultiTexCoord4iv }, + { "glMultiTexCoord4s", 10, _gloffset_MultiTexCoord4s }, + { "glMultiTexCoord4sv", 10, _gloffset_MultiTexCoord4sv }, + { "glLoadTransposeMatrixf", 10, -1 }, + { "glLoadTransposeMatrixd", 10, -1 }, + { "glMultTransposeMatrixf", 10, -1 }, + { "glMultTransposeMatrixd", 10, -1 }, + { "glFogCoordf", 10, -1 }, + { "glFogCoordfv", 10, -1 }, + { "glFogCoordd", 10, -1 }, + { "glFogCoorddv", 10, -1 }, + { "glFogCoordPointer", 10, -1 }, + { "glSecondaryColor3b", 10, -1 }, + { "glSecondaryColor3bv", 10, -1 }, + { "glSecondaryColor3d", 10, -1 }, + { "glSecondaryColor3dv", 10, -1 }, + { "glSecondaryColor3f", 10, -1 }, + { "glSecondaryColor3fv", 10, -1 }, + { "glSecondaryColor3i", 10, -1 }, + { "glSecondaryColor3iv", 10, -1 }, + { "glSecondaryColor3s", 10, -1 }, + { "glSecondaryColor3sv", 10, -1 }, + { "glSecondaryColor3ub", 10, -1 }, + { "glSecondaryColor3ubv", 10, -1 }, + { "glSecondaryColor3ui", 10, -1 }, + { "glSecondaryColor3uiv", 10, -1 }, + { "glSecondaryColor3us", 10, -1 }, + { "glSecondaryColor3usv", 10, -1 }, + { "glSecondaryColorPointer", 10, -1 }, + { "glWindowPos2d", 10, -1 }, + { "glWindowPos2dv", 10, -1 }, + { "glWindowPos2f", 10, -1 }, + { "glWindowPos2fv", 10, -1 }, + { "glWindowPos2i", 10, -1 }, + { "glWindowPos2iv", 10, -1 }, + { "glWindowPos2s", 10, -1 }, + { "glWindowPos2sv", 10, -1 }, + { "glWindowPos3d", 10, -1 }, + { "glWindowPos3dv", 10, -1 }, + { "glWindowPos3f", 10, -1 }, + { "glWindowPos3fv", 10, -1 }, + { "glWindowPos3i", 10, -1 }, + { "glWindowPos3iv", 10, -1 }, + { "glWindowPos3s", 10, -1 }, + { "glWindowPos3sv", 10, -1 }, + { "glProgramStringARB", 10, -1 }, + { "glProgramEnvParameter4dARB", 10, -1 }, + { "glProgramEnvParameter4dvARB", 10, -1 }, + { "glProgramEnvParameter4fARB", 10, -1 }, + { "glProgramEnvParameter4fvARB", 10, -1 }, + { "glProgramLocalParameter4dARB", 10, -1 }, + { "glProgramLocalParameter4dvARB", 10, -1 }, + { "glProgramLocalParameter4fARB", 10, -1 }, + { "glProgramLocalParameter4fvARB", 10, -1 }, + { "glGetProgramEnvParameterdvARB", 10, -1 }, + { "glGetProgramEnvParameterfvARB", 10, -1 }, + { "glGetProgramLocalParameterdvARB", 10, -1 }, + { "glGetProgramLocalParameterfvARB", 10, -1 }, + { "glGetProgramivARB", 10, -1 }, + { "glGetProgramStringARB", 10, -1 }, + { "glPolygonOffsetEXT", 10, -1 }, + { "glColorPointerEXT", 10, -1 }, + { "glEdgeFlagPointerEXT", 10, -1 }, + { "glIndexPointerEXT", 10, -1 }, + { "glNormalPointerEXT", 10, -1 }, + { "glTexCoordPointerEXT", 10, -1 }, + { "glVertexPointerEXT", 10, -1 }, + { "glLockArraysEXT", 10, -1 }, + { "glUnlockArraysEXT", 10, -1 }, + { "glWindowPos4dMESA", 10, -1 }, + { "glWindowPos4dvMESA", 10, -1 }, + { "glWindowPos4fMESA", 10, -1 }, + { "glWindowPos4fvMESA", 10, -1 }, + { "glWindowPos4iMESA", 10, -1 }, + { "glWindowPos4ivMESA", 10, -1 }, + { "glWindowPos4sMESA", 10, -1 }, + { "glWindowPos4svMESA", 10, -1 }, + { "glBindProgramNV", 10, -1 }, + { "glDeleteProgramsNV", 10, -1 }, + { "glGenProgramsNV", 10, -1 }, + { "glIsProgramNV", 10, -1 }, + { "glVertexAttrib1sNV", 10, -1 }, + { "glVertexAttrib1svNV", 10, -1 }, + { "glVertexAttrib2sNV", 10, -1 }, + { "glVertexAttrib2svNV", 10, -1 }, + { "glVertexAttrib3sNV", 10, -1 }, + { "glVertexAttrib3svNV", 10, -1 }, + { "glVertexAttrib4sNV", 10, -1 }, + { "glVertexAttrib4svNV", 10, -1 }, + { "glVertexAttrib1fNV", 10, -1 }, + { "glVertexAttrib1fvNV", 10, -1 }, + { "glVertexAttrib2fNV", 10, -1 }, + { "glVertexAttrib2fvNV", 10, -1 }, + { "glVertexAttrib3fNV", 10, -1 }, + { "glVertexAttrib3fvNV", 10, -1 }, + { "glVertexAttrib4fNV", 10, -1 }, + { "glVertexAttrib4fvNV", 10, -1 }, + { "glVertexAttrib1dNV", 10, -1 }, + { "glVertexAttrib1dvNV", 10, -1 }, + { "glVertexAttrib2dNV", 10, -1 }, + { "glVertexAttrib2dvNV", 10, -1 }, + { "glVertexAttrib3dNV", 10, -1 }, + { "glVertexAttrib3dvNV", 10, -1 }, + { "glVertexAttrib4dNV", 10, -1 }, + { "glVertexAttrib4dvNV", 10, -1 }, + { "glVertexAttrib4ubNV", 10, -1 }, + { "glVertexAttrib4ubvNV", 10, -1 }, + { "glVertexAttribs1svNV", 10, -1 }, + { "glVertexAttribs2svNV", 10, -1 }, + { "glVertexAttribs3svNV", 10, -1 }, + { "glVertexAttribs4svNV", 10, -1 }, + { "glVertexAttribs1fvNV", 10, -1 }, + { "glVertexAttribs2fvNV", 10, -1 }, + { "glVertexAttribs3fvNV", 10, -1 }, + { "glVertexAttribs4fvNV", 10, -1 }, + { "glVertexAttribs1dvNV", 10, -1 }, + { "glVertexAttribs2dvNV", 10, -1 }, + { "glVertexAttribs3dvNV", 10, -1 }, + { "glVertexAttribs4dvNV", 10, -1 }, + { "glVertexAttribs4ubvNV", 10, -1 }, + { "glGenFragmentShadersATI", 10, -1 }, + { "glBindFragmentShaderATI", 10, -1 }, + { "glDeleteFragmentShaderATI", 10, -1 }, + { "glBeginFragmentShaderATI", 10, -1 }, + { "glEndFragmentShaderATI", 10, -1 }, + { "glPassTexCoordATI", 10, -1 }, + { "glSampleMapATI", 10, -1 }, + { "glColorFragmentOp1ATI", 10, -1 }, + { "glColorFragmentOp2ATI", 10, -1 }, + { "glColorFragmentOp3ATI", 10, -1 }, + { "glAlphaFragmentOp1ATI", 10, -1 }, + { "glAlphaFragmentOp2ATI", 10, -1 }, + { "glAlphaFragmentOp3ATI", 10, -1 }, + { "glSetFragmentShaderConstantATI", 10, -1 }, + { "glActiveStencilFaceEXT", 10, -1 }, + { "glStencilFuncSeparateATI", 10, -1 }, + { "glProgramEnvParameters4fvEXT", 10, -1 }, + { "glProgramLocalParameters4fvEXT", 10, -1 }, + { "glPrimitiveRestartNV", 10, -1 }, + + { NULL, 0, -1 } +}; + +const struct function gl_core_functions_possible[] = { + /* GL 3.1 */ + { "glTexBuffer", 31, -1 }, + + /* GL 3.2 */ + { "glFramebufferTexture", 32, -1 }, + + /* GL 4.3 */ + { "glIsRenderbuffer", 43, -1 }, + { "glBindRenderbuffer", 43, -1 }, + { "glDeleteRenderbuffers", 43, -1 }, + { "glGenRenderbuffers", 43, -1 }, + { "glRenderbufferStorage", 43, -1 }, + { "glGetRenderbufferParameteriv", 43, -1 }, + { "glIsFramebuffer", 43, -1 }, + { "glBindFramebuffer", 43, -1 }, + { "glDeleteFramebuffers", 43, -1 }, + { "glGenFramebuffers", 43, -1 }, + { "glCheckFramebufferStatus", 43, -1 }, + { "glFramebufferTexture1D", 43, -1 }, + { "glFramebufferTexture2D", 43, -1 }, + { "glFramebufferTexture3D", 43, -1 }, + { "glFramebufferRenderbuffer", 43, -1 }, + { "glGetFramebufferAttachmentParameteriv", 43, -1 }, + { "glGenerateMipmap", 43, -1 }, + { "glBlitFramebuffer", 43, -1 }, + { "glRenderbufferStorageMultisample", 43, -1 }, + { "glFramebufferTextureLayer", 43, -1 }, + { "glMapBufferRange", 43, -1 }, + { "glFlushMappedBufferRange", 43, -1 }, + { "glBindVertexArray", 43, -1 }, + { "glDeleteVertexArrays", 43, -1 }, + { "glGenVertexArrays", 43, -1 }, + { "glIsVertexArray", 43, -1 }, + { "glGetUniformIndices", 43, -1 }, + { "glGetActiveUniformsiv", 43, -1 }, + { "glGetActiveUniformName", 43, -1 }, + { "glGetUniformBlockIndex", 43, -1 }, + { "glGetActiveUniformBlockiv", 43, -1 }, + { "glGetActiveUniformBlockName", 43, -1 }, + { "glUniformBlockBinding", 43, -1 }, + { "glCopyBufferSubData", 43, -1 }, + { "glDrawElementsBaseVertex", 43, -1 }, + { "glDrawRangeElementsBaseVertex", 43, -1 }, + { "glDrawElementsInstancedBaseVertex", 43, -1 }, + { "glMultiDrawElementsBaseVertex", 43, -1 }, + { "glProvokingVertex", 43, -1 }, + { "glFenceSync", 43, -1 }, + { "glIsSync", 43, -1 }, + { "glDeleteSync", 43, -1 }, + { "glClientWaitSync", 43, -1 }, + { "glWaitSync", 43, -1 }, + { "glGetInteger64v", 43, -1 }, + { "glGetSynciv", 43, -1 }, + { "glTexImage2DMultisample", 43, -1 }, + { "glTexImage3DMultisample", 43, -1 }, + { "glGetMultisamplefv", 43, -1 }, + { "glSampleMaski", 43, -1 }, + { "glBlendEquationiARB", 43, -1 }, + { "glBlendEquationSeparateiARB", 43, -1 }, + { "glBlendFunciARB", 43, -1 }, + { "glBlendFuncSeparateiARB", 43, -1 }, + { "glMinSampleShadingARB", 43, -1 }, // XXX: Add to xml +// { "glNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glDeleteNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glCompileShaderIncludeARB", 43, -1 }, // XXX: Add to xml +// { "glIsNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glGetNamedStringARB", 43, -1 }, // XXX: Add to xml +// { "glGetNamedStringivARB", 43, -1 }, // XXX: Add to xml + { "glBindFragDataLocationIndexed", 43, -1 }, + { "glGetFragDataIndex", 43, -1 }, + { "glGenSamplers", 43, -1 }, + { "glDeleteSamplers", 43, -1 }, + { "glIsSampler", 43, -1 }, + { "glBindSampler", 43, -1 }, + { "glSamplerParameteri", 43, -1 }, + { "glSamplerParameteriv", 43, -1 }, + { "glSamplerParameterf", 43, -1 }, + { "glSamplerParameterfv", 43, -1 }, + { "glSamplerParameterIiv", 43, -1 }, + { "glSamplerParameterIuiv", 43, -1 }, + { "glGetSamplerParameteriv", 43, -1 }, + { "glGetSamplerParameterIiv", 43, -1 }, + { "glGetSamplerParameterfv", 43, -1 }, + { "glGetSamplerParameterIuiv", 43, -1 }, + { "glQueryCounter", 43, -1 }, + { "glGetQueryObjecti64v", 43, -1 }, + { "glGetQueryObjectui64v", 43, -1 }, + { "glVertexP2ui", 43, -1 }, + { "glVertexP2uiv", 43, -1 }, + { "glVertexP3ui", 43, -1 }, + { "glVertexP3uiv", 43, -1 }, + { "glVertexP4ui", 43, -1 }, + { "glVertexP4uiv", 43, -1 }, + { "glTexCoordP1ui", 43, -1 }, + { "glTexCoordP1uiv", 43, -1 }, + { "glTexCoordP2ui", 43, -1 }, + { "glTexCoordP2uiv", 43, -1 }, + { "glTexCoordP3ui", 43, -1 }, + { "glTexCoordP3uiv", 43, -1 }, + { "glTexCoordP4ui", 43, -1 }, + { "glTexCoordP4uiv", 43, -1 }, + { "glMultiTexCoordP1ui", 43, -1 }, + { "glMultiTexCoordP1uiv", 43, -1 }, + { "glMultiTexCoordP2ui", 43, -1 }, + { "glMultiTexCoordP2uiv", 43, -1 }, + { "glMultiTexCoordP3ui", 43, -1 }, + { "glMultiTexCoordP3uiv", 43, -1 }, + { "glMultiTexCoordP4ui", 43, -1 }, + { "glMultiTexCoordP4uiv", 43, -1 }, + { "glNormalP3ui", 43, -1 }, + { "glNormalP3uiv", 43, -1 }, + { "glColorP3ui", 43, -1 }, + { "glColorP3uiv", 43, -1 }, + { "glColorP4ui", 43, -1 }, + { "glColorP4uiv", 43, -1 }, + { "glVertexAttribP1ui", 43, -1 }, + { "glVertexAttribP1uiv", 43, -1 }, + { "glVertexAttribP2ui", 43, -1 }, + { "glVertexAttribP2uiv", 43, -1 }, + { "glVertexAttribP3ui", 43, -1 }, + { "glVertexAttribP3uiv", 43, -1 }, + { "glVertexAttribP4ui", 43, -1 }, + { "glVertexAttribP4uiv", 43, -1 }, + { "glDrawArraysIndirect", 43, -1 }, + { "glDrawElementsIndirect", 43, -1 }, + + { "glUniform1d", 40, -1 }, + { "glUniform2d", 40, -1 }, + { "glUniform3d", 40, -1 }, + { "glUniform4d", 40, -1 }, + { "glUniform1dv", 40, -1 }, + { "glUniform2dv", 40, -1 }, + { "glUniform3dv", 40, -1 }, + { "glUniform4dv", 40, -1 }, + { "glUniformMatrix2dv", 40, -1 }, + { "glUniformMatrix3dv", 40, -1 }, + { "glUniformMatrix4dv", 40, -1 }, + { "glUniformMatrix2x3dv", 40, -1 }, + { "glUniformMatrix2x4dv", 40, -1 }, + { "glUniformMatrix3x2dv", 40, -1 }, + { "glUniformMatrix3x4dv", 40, -1 }, + { "glUniformMatrix4x2dv", 40, -1 }, + { "glUniformMatrix4x3dv", 40, -1 }, + { "glGetUniformdv", 43, -1 }, +// { "glGetSubroutineUniformLocation", 43, -1 }, // XXX: Add to xml +// { "glGetSubroutineIndex", 43, -1 }, // XXX: Add to xml +// { "glGetActiveSubroutineUniformiv", 43, -1 }, // XXX: Add to xml +// { "glGetActiveSubroutineUniformName", 43, -1 }, // XXX: Add to xml +// { "glGetActiveSubroutineName", 43, -1 }, // XXX: Add to xml +// { "glUniformSubroutinesuiv", 43, -1 }, // XXX: Add to xml +// { "glGetUniformSubroutineuiv", 43, -1 }, // XXX: Add to xml +// { "glGetProgramStageiv", 43, -1 }, // XXX: Add to xml +// { "glPatchParameteri", 43, -1 }, // XXX: Add to xml +// { "glPatchParameterfv", 43, -1 }, // XXX: Add to xml + + { "glBindTransformFeedback", 43, -1 }, + { "glDeleteTransformFeedbacks", 43, -1 }, + { "glGenTransformFeedbacks", 43, -1 }, + { "glIsTransformFeedback", 43, -1 }, + { "glPauseTransformFeedback", 43, -1 }, + { "glResumeTransformFeedback", 43, -1 }, + { "glDrawTransformFeedback", 43, -1 }, + { "glDrawTransformFeedbackStream", 43, -1 }, + { "glBeginQueryIndexed", 43, -1 }, + { "glEndQueryIndexed", 43, -1 }, + { "glGetQueryIndexediv", 43, -1 }, + { "glReleaseShaderCompiler", 43, -1 }, + { "glShaderBinary", 43, -1 }, + { "glGetShaderPrecisionFormat", 43, -1 }, + { "glDepthRangef", 43, -1 }, + { "glClearDepthf", 43, -1 }, + { "glGetProgramBinary", 43, -1 }, + { "glProgramBinary", 43, -1 }, + { "glProgramParameteri", 43, -1 }, + { "glUseProgramStages", 43, -1 }, + { "glActiveShaderProgram", 43, -1 }, + { "glCreateShaderProgramv", 43, -1 }, + { "glBindProgramPipeline", 43, -1 }, + { "glDeleteProgramPipelines", 43, -1 }, + { "glGenProgramPipelines", 43, -1 }, + { "glIsProgramPipeline", 43, -1 }, + { "glGetProgramPipelineiv", 43, -1 }, + { "glProgramUniform1i", 43, -1 }, + { "glProgramUniform1iv", 43, -1 }, + { "glProgramUniform1f", 43, -1 }, + { "glProgramUniform1fv", 43, -1 }, + { "glProgramUniform1d", 40, -1 }, + { "glProgramUniform1dv", 40, -1 }, + { "glProgramUniform1ui", 43, -1 }, + { "glProgramUniform1uiv", 43, -1 }, + { "glProgramUniform2i", 43, -1 }, + { "glProgramUniform2iv", 43, -1 }, + { "glProgramUniform2f", 43, -1 }, + { "glProgramUniform2fv", 43, -1 }, + { "glProgramUniform2d", 40, -1 }, + { "glProgramUniform2dv", 40, -1 }, + { "glProgramUniform2ui", 43, -1 }, + { "glProgramUniform2uiv", 43, -1 }, + { "glProgramUniform3i", 43, -1 }, + { "glProgramUniform3iv", 43, -1 }, + { "glProgramUniform3f", 43, -1 }, + { "glProgramUniform3fv", 43, -1 }, + { "glProgramUniform3d", 40, -1 }, + { "glProgramUniform3dv", 40, -1 }, + { "glProgramUniform3ui", 43, -1 }, + { "glProgramUniform3uiv", 43, -1 }, + { "glProgramUniform4i", 43, -1 }, + { "glProgramUniform4iv", 43, -1 }, + { "glProgramUniform4f", 43, -1 }, + { "glProgramUniform4fv", 43, -1 }, + { "glProgramUniform4d", 40, -1 }, + { "glProgramUniform4dv", 40, -1 }, + { "glProgramUniform4ui", 43, -1 }, + { "glProgramUniform4uiv", 43, -1 }, + { "glProgramUniformMatrix2fv", 43, -1 }, + { "glProgramUniformMatrix3fv", 43, -1 }, + { "glProgramUniformMatrix4fv", 43, -1 }, + { "glProgramUniformMatrix2dv", 40, -1 }, + { "glProgramUniformMatrix3dv", 40, -1 }, + { "glProgramUniformMatrix4dv", 40, -1 }, + { "glProgramUniformMatrix2x3fv", 43, -1 }, + { "glProgramUniformMatrix3x2fv", 43, -1 }, + { "glProgramUniformMatrix2x4fv", 43, -1 }, + { "glProgramUniformMatrix4x2fv", 43, -1 }, + { "glProgramUniformMatrix3x4fv", 43, -1 }, + { "glProgramUniformMatrix4x3fv", 43, -1 }, + { "glProgramUniformMatrix2x3dv", 40, -1 }, + { "glProgramUniformMatrix3x2dv", 40, -1 }, + { "glProgramUniformMatrix2x4dv", 40, -1 }, + { "glProgramUniformMatrix4x2dv", 40, -1 }, + { "glProgramUniformMatrix3x4dv", 40, -1 }, + { "glProgramUniformMatrix4x3dv", 40, -1 }, + { "glValidateProgramPipeline", 43, -1 }, + { "glGetProgramPipelineInfoLog", 43, -1 }, + + { "glVertexAttribL1d", 41, -1 }, + { "glVertexAttribL2d", 41, -1 }, + { "glVertexAttribL3d", 41, -1 }, + { "glVertexAttribL4d", 41, -1 }, + { "glVertexAttribL1dv", 41, -1 }, + { "glVertexAttribL2dv", 41, -1 }, + { "glVertexAttribL3dv", 41, -1 }, + { "glVertexAttribL4dv", 41, -1 }, + { "glVertexAttribLPointer", 41, -1 }, + { "glGetVertexAttribLdv", 41, -1 }, + { "glViewportArrayv", 43, -1 }, + { "glViewportIndexedf", 43, -1 }, + { "glViewportIndexedfv", 43, -1 }, + { "glScissorArrayv", 43, -1 }, + { "glScissorIndexed", 43, -1 }, + { "glScissorIndexedv", 43, -1 }, + { "glDepthRangeArrayv", 43, -1 }, + { "glDepthRangeIndexed", 43, -1 }, + +// { "glCreateSyncFromCLeventARB", 43, -1 }, // XXX: Add to xml + + { "glDrawArraysInstancedBaseInstance", 43, -1 }, + { "glDrawElementsInstancedBaseInstance", 43, -1 }, + { "glDrawElementsInstancedBaseVertexBaseInstance", 43, -1 }, + { "glDrawTransformFeedbackInstanced", 43, -1 }, + { "glDrawTransformFeedbackStreamInstanced", 43, -1 }, + { "glGetActiveAtomicCounterBufferiv", 43, -1 }, + { "glBindImageTexture", 43, -1 }, + { "glMemoryBarrier", 43, -1 }, + { "glTexStorage1D", 43, -1 }, + { "glTexStorage2D", 43, -1 }, + { "glTexStorage3D", 43, -1 }, + { "glTextureStorage1DEXT", 43, -1 }, + { "glTextureStorage2DEXT", 43, -1 }, + { "glTextureStorage3DEXT", 43, -1 }, + { "glClearBufferData", 43, -1 }, + { "glClearBufferSubData", 43, -1 }, +// { "glClearNamedBufferDataEXT", 43, -1 }, // XXX: Add to xml +// { "glClearNamedBufferSubDataEXT", 43, -1 }, // XXX: Add to xml + { "glCopyImageSubData", 43, -1 }, + { "glTextureView", 43, -1 }, + { "glBindVertexBuffer", 43, -1 }, + { "glVertexAttribFormat", 43, -1 }, + { "glVertexAttribIFormat", 43, -1 }, + { "glVertexAttribBinding", 43, -1 }, + { "glVertexBindingDivisor", 43, -1 }, +// { "glVertexArrayBindVertexBufferEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribFormatEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribIFormatEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribLFormatEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexAttribBindingEXT", 43, -1 }, // XXX: Add to xml +// { "glVertexArrayVertexBindingDivisorEXT", 43, -1 }, // XXX: Add to xml +// { "glFramebufferParameteri", 43, -1 }, // XXX: Add to xml +// { "glGetFramebufferParameteriv", 43, -1 }, // XXX: Add to xml +// { "glNamedFramebufferParameteriEXT", 43, -1 }, // XXX: Add to xml +// { "glGetNamedFramebufferParameterivEXT", 43, -1 }, // XXX: Add to xml +// { "glGetInternalformati64v", 43, -1 }, // XXX: Add to xml + { "glInvalidateTexSubImage", 43, -1 }, + { "glInvalidateTexImage", 43, -1 }, + { "glInvalidateBufferSubData", 43, -1 }, + { "glInvalidateBufferData", 43, -1 }, + { "glInvalidateFramebuffer", 43, -1 }, + { "glInvalidateSubFramebuffer", 43, -1 }, + { "glMultiDrawArraysIndirect", 43, -1 }, + { "glMultiDrawElementsIndirect", 43, -1 }, + { "glGetProgramInterfaceiv", 43, -1 }, + { "glGetProgramResourceIndex", 43, -1 }, + { "glGetProgramResourceName", 43, -1 }, + { "glGetProgramResourceiv", 43, -1 }, + { "glGetProgramResourceLocation", 43, -1 }, + { "glGetProgramResourceLocationIndex", 43, -1 }, +// { "glShaderStorageBlockBinding", 43, -1 }, // XXX: Add to xml + { "glTexBufferRange", 43, -1 }, +// { "glTextureBufferRangeEXT", 43, -1 }, // XXX: Add to xml + { "glTexStorage2DMultisample", 43, -1 }, + { "glTexStorage3DMultisample", 43, -1 }, +// { "glTextureStorage2DMultisampleEXT", 43, -1 }, // XXX: Add to xml +// { "glTextureStorage3DMultisampleEXT", 43, -1 }, // XXX: Add to xml + /* GL_ARB_direct_state_access */ { "glCreateTransformFeedbacks", 45, -1 }, { "glTransformFeedbackBufferBase", 45, -1 }, @@ -980,6 +1754,24 @@ const struct function gl_core_functions_possible[] = { { "glGetNamedBufferParameteri64v", 45, -1 }, { "glGetNamedBufferPointerv", 45, -1 }, { "glGetNamedBufferSubData", 45, -1 }, + { "glCreateFramebuffers", 45, -1 }, + { "glNamedFramebufferRenderbuffer", 45, -1 }, + { "glNamedFramebufferParameteri", 45, -1 }, + { "glNamedFramebufferTexture", 45, -1 }, + { "glNamedFramebufferTextureLayer", 45, -1 }, + { "glNamedFramebufferDrawBuffer", 45, -1 }, + { "glNamedFramebufferDrawBuffers", 45, -1 }, + { "glNamedFramebufferReadBuffer", 45, -1 }, + { "glInvalidateNamedFramebufferSubData", 45, -1 }, + { "glInvalidateNamedFramebufferData", 45, -1 }, + { "glClearNamedFramebufferiv", 45, -1 }, + { "glClearNamedFramebufferuiv", 45, -1 }, + { "glClearNamedFramebufferfv", 45, -1 }, + { "glClearNamedFramebufferfi", 45, -1 }, + { "glBlitNamedFramebuffer", 45, -1 }, + { "glCheckNamedFramebufferStatus", 45, -1 }, + { "glGetNamedFramebufferParameteriv", 45, -1 }, + { "glGetNamedFramebufferAttachmentParameteriv", 45, -1 }, { "glCreateRenderbuffers", 45, -1 }, { "glNamedRenderbufferStorage", 45, -1 }, { "glNamedRenderbufferStorageMultisample", 45, -1 }, @@ -1039,9 +1831,6 @@ const struct function gl_core_functions_possible[] = { { "glGetQueryBufferObjecti64v", 45, -1 }, { "glGetQueryBufferObjectui64v", 45, -1 }, - /* GL_EXT_polygon_offset_clamp */ - { "glPolygonOffsetClampEXT", 11, -1 }, - { NULL, 0, -1 } }; @@ -1596,3 +2385,88 @@ const struct function gles3_functions_possible[] = { { NULL, 0, -1 } }; + +const struct function gles31_functions_possible[] = { + { "glDispatchCompute", 31, -1 }, + { "glDispatchComputeIndirect", 31, -1 }, + { "glDrawArraysIndirect", 31, -1 }, + { "glDrawElementsIndirect", 31, -1 }, + + // FINISHME: These two functions have not been implemented yet. They come + // FINISHME: from the ARB_framebuffer_no_attachments extension. + // { "glFramebufferParameteri", 31, -1 }, + // { "glGetFramebufferParameteriv", 31, -1 }, + + { "glGetProgramInterfaceiv", 31, -1 }, + { "glGetProgramResourceIndex", 31, -1 }, + { "glGetProgramResourceName", 31, -1 }, + { "glGetProgramResourceiv", 31, -1 }, + { "glGetProgramResourceLocation", 31, -1 }, + + // We check for the aliased EXT versions in GLES 2 + // { "glUseProgramStages", 31, -1 }, + // { "glActiveShaderProgram", 31, -1 }, + // { "glCreateShaderProgramv", 31, -1 }, + // { "glBindProgramPipeline", 31, -1 }, + // { "glDeleteProgramPipelines", 31, -1 }, + // { "glGenProgramPipelines", 31, -1 }, + // { "glIsProgramPipeline", 31, -1 }, + // { "glGetProgramPipelineiv", 31, -1 }, + // { "glProgramUniform1i", 31, -1 }, + // { "glProgramUniform2i", 31, -1 }, + // { "glProgramUniform3i", 31, -1 }, + // { "glProgramUniform4i", 31, -1 }, + // { "glProgramUniform1f", 31, -1 }, + // { "glProgramUniform2f", 31, -1 }, + // { "glProgramUniform3f", 31, -1 }, + // { "glProgramUniform4f", 31, -1 }, + // { "glProgramUniform1iv", 31, -1 }, + // { "glProgramUniform2iv", 31, -1 }, + // { "glProgramUniform3iv", 31, -1 }, + // { "glProgramUniform4iv", 31, -1 }, + // { "glProgramUniform1fv", 31, -1 }, + // { "glProgramUniform2fv", 31, -1 }, + // { "glProgramUniform3fv", 31, -1 }, + // { "glProgramUniform4fv", 31, -1 }, + // { "glProgramUniformMatrix2fv", 31, -1 }, + // { "glProgramUniformMatrix3fv", 31, -1 }, + // { "glProgramUniformMatrix4fv", 31, -1 }, + // { "glProgramUniformMatrix2x3fv", 31, -1 }, + // { "glProgramUniformMatrix3x2fv", 31, -1 }, + // { "glProgramUniformMatrix2x4fv", 31, -1 }, + // { "glProgramUniformMatrix4x2fv", 31, -1 }, + // { "glProgramUniformMatrix3x4fv", 31, -1 }, + // { "glProgramUniformMatrix4x3fv", 31, -1 }, + // { "glValidateProgramPipeline", 31, -1 }, + // { "glGetProgramPipelineInfoLog", 31, -1 }, + + // We check for the aliased EXT versions in GLES 3 + // { "glProgramUniform1ui", 31, -1 }, + // { "glProgramUniform2ui", 31, -1 }, + // { "glProgramUniform3ui", 31, -1 }, + // { "glProgramUniform4ui", 31, -1 }, + // { "glProgramUniform1uiv", 31, -1 }, + // { "glProgramUniform2uiv", 31, -1 }, + // { "glProgramUniform3uiv", 31, -1 }, + // { "glProgramUniform4uiv", 31, -1 }, + + { "glBindImageTexture", 31, -1 }, + { "glGetBooleani_v", 31, -1 }, + { "glMemoryBarrier", 31, -1 }, + + // FINISHME: This function has not been implemented yet. + // { "glMemoryBarrierByRegion", 31, -1 }, + + { "glTexStorage2DMultisample", 31, -1 }, + { "glGetMultisamplefv", 31, -1 }, + { "glSampleMaski", 31, -1 }, + { "glGetTexLevelParameteriv", 31, -1 }, + { "glGetTexLevelParameterfv", 31, -1 }, + { "glBindVertexBuffer", 31, -1 }, + { "glVertexAttribFormat", 31, -1 }, + { "glVertexAttribIFormat", 31, -1 }, + { "glVertexAttribBinding", 31, -1 }, + { "glVertexBindingDivisor", 31, -1 }, + + { NULL, 0, -1 }, + }; diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c index ec521e6c6e5..3edafc0f776 100644 --- a/src/mesa/main/texenv.c +++ b/src/mesa/main/texenv.c @@ -646,7 +646,7 @@ _mesa_GetTexEnvfv( GLenum target, GLenum pname, GLfloat *params ) if (pname == GL_TEXTURE_ENV_COLOR) { if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state(ctx); - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4FV( params, texUnit->EnvColor ); else COPY_4FV( params, texUnit->EnvColorUnclamped ); diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 7bc1da7f805..3d85615fa45 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -222,7 +222,7 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat ) } } - if (ctx->Extensions.ARB_stencil_texturing) { + if (ctx->Extensions.ARB_texture_stencil8) { switch (internalFormat) { case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1: diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index b5d42d3047f..d74134f41b1 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1709,7 +1709,7 @@ get_tex_parameterfv(struct gl_context *ctx, if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state_locked(ctx); - if (_mesa_get_clamp_fragment_color(ctx)) { + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) { params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F); params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F); params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F); diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index cd87a27d2db..6b0aed4ea1a 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -167,7 +167,7 @@ static const struct internal_format_class_info s3tc_compatible_internal_formats[ * \return VIEW_CLASS if internalformat found in table, false otherwise. */ static GLenum -lookup_view_class(struct gl_context *ctx, GLenum internalformat) +lookup_view_class(const struct gl_context *ctx, GLenum internalformat) { GLuint i; @@ -176,9 +176,11 @@ lookup_view_class(struct gl_context *ctx, GLenum internalformat) return compatible_internal_formats[i].view_class; } - if (ctx->Extensions.EXT_texture_compression_s3tc && ctx->Extensions.EXT_texture_sRGB) { + if (ctx->Extensions.EXT_texture_compression_s3tc && + ctx->Extensions.EXT_texture_sRGB) { for (i = 0; i < ARRAY_SIZE(s3tc_compatible_internal_formats); i++) { - if (s3tc_compatible_internal_formats[i].internal_format == internalformat) + if (s3tc_compatible_internal_formats[i].internal_format + == internalformat) return s3tc_compatible_internal_formats[i].view_class; } } @@ -226,7 +228,8 @@ initialize_texture_fields(struct gl_context *ctx, 0, internalFormat, texFormat); } - _mesa_next_mipmap_level_size(target, 0, levelWidth, levelHeight, levelDepth, + _mesa_next_mipmap_level_size(target, 0, + levelWidth, levelHeight, levelDepth, &levelWidth, &levelHeight, &levelDepth); } @@ -320,8 +323,8 @@ target_valid(struct gl_context *ctx, GLenum origTarget, GLenum newTarget) * If an error is found, record it with _mesa_error() * \return false if any error, true otherwise. */ -GLboolean -_mesa_texture_view_compatible_format(struct gl_context *ctx, +bool +_mesa_texture_view_compatible_format(const struct gl_context *ctx, GLenum origInternalFormat, GLenum newInternalFormat) { @@ -334,15 +337,16 @@ _mesa_texture_view_compatible_format(struct gl_context *ctx, * or an INVALID_OPERATION error is generated. */ if (origInternalFormat == newInternalFormat) - return GL_TRUE; + return true; origViewClass = lookup_view_class(ctx, origInternalFormat); newViewClass = lookup_view_class(ctx, newInternalFormat); if ((origViewClass == newViewClass) && origViewClass != false) - return GL_TRUE; + return true; - return GL_FALSE; + return false; } + /** * Helper function for TexStorage and teximagemultisample to set immutable * texture state needed by ARB_texture_view. @@ -357,17 +361,19 @@ _mesa_set_texture_view_state(struct gl_context *ctx, /* Get a reference to what will become this View's base level */ texImage = _mesa_select_tex_image(texObj, target, 0); - /* When an immutable texture is created via glTexStorage or glTexImageMultisample, + /* When an immutable texture is created via glTexStorage or + * glTexImageMultisample, * TEXTURE_IMMUTABLE_FORMAT becomes TRUE. * TEXTURE_IMMUTABLE_LEVELS and TEXTURE_VIEW_NUM_LEVELS become levels. * If the texture target is TEXTURE_1D_ARRAY then * TEXTURE_VIEW_NUM_LAYERS becomes height. * If the texture target is TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP_ARRAY, - * or TEXTURE_2D_MULTISAMPLE_ARRAY then TEXTURE_VIEW_NUM_LAYERS becomes depth. + * or TEXTURE_2D_MULTISAMPLE_ARRAY then TEXTURE_VIEW_NUM_LAYERS becomes + * depth. * If the texture target is TEXTURE_CUBE_MAP, then * TEXTURE_VIEW_NUM_LAYERS becomes 6. * For any other texture target, TEXTURE_VIEW_NUM_LAYERS becomes 1. - * + * * ARB_texture_multisample: Multisample textures do * not have multiple image levels. */ @@ -401,7 +407,6 @@ _mesa_set_texture_view_state(struct gl_context *ctx, case GL_TEXTURE_CUBE_MAP: texObj->NumLayers = 6; break; - } } @@ -435,16 +440,20 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, minlevel, numlevels, minlayer, numlayers); if (origtexture == 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", origtexture); + _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", + origtexture); return; } /* Need original texture information to validate arguments */ origTexObj = _mesa_lookup_texture(ctx, origtexture); - /* If <origtexture> is not the name of a texture, INVALID_VALUE is generated. */ + /* If <origtexture> is not the name of a texture, INVALID_VALUE + * is generated. + */ if (!origTexObj) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", origtexture); + _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", + origtexture); return; } @@ -452,7 +461,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, * INVALID_OPERATION is generated. */ if (!origTexObj->Immutable) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(origtexture not immutable)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(origtexture not immutable)"); return; } @@ -467,7 +477,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, */ texObj = _mesa_lookup_texture(ctx, texture); if (texObj == NULL) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(texture = %u non-gen name)", texture); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(texture = %u non-gen name)", texture); return; } @@ -475,7 +486,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, * the error INVALID_OPERATION is generated. */ if (texObj->Target) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(texture = %u already bound)", texture); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(texture = %u already bound)", texture); return; } @@ -484,33 +496,35 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, return; /* error was recorded */ } - /* minlevel and minlayer are relative to the view of origtexture + /* minlevel and minlayer are relative to the view of origtexture. * If minlevel or minlayer is greater than level or layer, respectively, - * of origtexture return INVALID_VALUE. + * return INVALID_VALUE. */ newViewMinLevel = origTexObj->MinLevel + minlevel; newViewMinLayer = origTexObj->MinLayer + minlayer; if (newViewMinLevel >= (origTexObj->MinLevel + origTexObj->NumLevels)) { _mesa_error(ctx, GL_INVALID_VALUE, - "glTextureView(new minlevel (%d) > orig minlevel (%d) + orig numlevels (%d))", + "glTextureView(new minlevel (%d) > orig minlevel (%d)" + " + orig numlevels (%d))", newViewMinLevel, origTexObj->MinLevel, origTexObj->NumLevels); return; } if (newViewMinLayer >= (origTexObj->MinLayer + origTexObj->NumLayers)) { _mesa_error(ctx, GL_INVALID_VALUE, - "glTextureView(new minlayer (%d) > orig minlayer (%d) + orig numlayers (%d))", + "glTextureView(new minlayer (%d) > orig minlayer (%d)" + " + orig numlayers (%d))", newViewMinLayer, origTexObj->MinLayer, origTexObj->NumLayers); return; } if (!_mesa_texture_view_compatible_format(ctx, - origTexObj->Image[0][0]->InternalFormat, - internalformat)) { + origTexObj->Image[0][0]->InternalFormat, + internalformat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glTextureView(internalformat %s not compatible with origtexture %s)", - _mesa_lookup_enum_by_nr(internalformat), - _mesa_lookup_enum_by_nr(origTexObj->Image[0][0]->InternalFormat)); + "glTextureView(internalformat %s not compatible with origtexture %s)", + _mesa_lookup_enum_by_nr(internalformat), + _mesa_lookup_enum_by_nr(origTexObj->Image[0][0]->InternalFormat)); return; } @@ -569,14 +583,16 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, dimensionsOK = _mesa_legal_texture_dimensions(ctx, target, 0, width, height, depth, 0); if (!dimensionsOK) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(invalid width or height or depth)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(invalid width or height or depth)"); return; } sizeOK = ctx->Driver.TestProxyTexImage(ctx, target, 0, texFormat, width, height, depth, 0); if (!sizeOK) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(invalid texture size)"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(invalid texture size)"); return; } @@ -591,17 +607,19 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, case GL_TEXTURE_RECTANGLE: case GL_TEXTURE_2D_MULTISAMPLE: if (numlayers != 1) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(numlayers %d != 1)", numlayers); + _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(numlayers %d != 1)", + numlayers); return; } break; case GL_TEXTURE_CUBE_MAP: - /* If the new texture's target is TEXTURE_CUBE_MAP, the clamped <numlayers> - * must be equal to 6. + /* If the new texture's target is TEXTURE_CUBE_MAP, the clamped + * <numlayers> must be equal to 6. */ if (newViewNumLayers != 6) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(clamped numlayers %d != 6)", + _mesa_error(ctx, GL_INVALID_VALUE, + "glTextureView(clamped numlayers %d != 6)", newViewNumLayers); return; } @@ -615,7 +633,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, */ if ((newViewNumLayers % 6) != 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glTextureView(clamped numlayers %d is not a multiple of 6)", + "glTextureView(clamped numlayers %d is not" + " a multiple of 6)", newViewNumLayers); return; } @@ -628,7 +647,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, */ if ((target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY) && (origTexImage->Width != origTexImage->Height)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(origtexture width (%d) != height (%d))", + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureView(origtexture width (%d) != height (%d))", origTexImage->Width, origTexImage->Height); return; } @@ -662,7 +682,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, texObj->ImmutableLevels = origTexObj->ImmutableLevels; texObj->Target = target; - if (ctx->Driver.TextureView != NULL && !ctx->Driver.TextureView(ctx, texObj, origTexObj)) { + if (ctx->Driver.TextureView != NULL && + !ctx->Driver.TextureView(ctx, texObj, origTexObj)) { return; /* driver recorded error */ } } diff --git a/src/mesa/main/textureview.h b/src/mesa/main/textureview.h index 549a13cd809..59e24b68dd0 100644 --- a/src/mesa/main/textureview.h +++ b/src/mesa/main/textureview.h @@ -29,8 +29,8 @@ #ifndef TEXTUREVIEW_H #define TEXTUREVIEW_H -GLboolean -_mesa_texture_view_compatible_format(struct gl_context *ctx, +bool +_mesa_texture_view_compatible_format(const struct gl_context *ctx, GLenum origInternalFormat, GLenum newInternalFormat); @@ -41,7 +41,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, GLuint minlayer, GLuint numlayers); extern void -_mesa_set_texture_view_state(struct gl_context *ctx, struct gl_texture_object *texObj, - GLenum target, GLuint levels); +_mesa_set_texture_view_state(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLuint levels); #endif /* TEXTUREVIEW_H */ diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 728bd1bac10..cab5083e81b 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -237,6 +237,13 @@ validate_uniform_parameters(struct gl_context *ctx, struct gl_uniform_storage *const uni = shProg->UniformRemapTable[location]; + /* Even though no location is assigned to a built-in uniform and this + * function should already have returned NULL, this test makes it explicit + * that we are not allowing to update the value of a built-in. + */ + if (uni->builtin) + return NULL; + if (uni->array_elements == 0) { if (count > 1) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -1028,6 +1035,10 @@ _mesa_get_uniform_location(struct gl_shader_program *shProg, if (!found) return GL_INVALID_INDEX; + /* If the uniform is built-in, fail. */ + if (shProg->UniformStorage[location].builtin) + return GL_INVALID_INDEX; + /* If the uniform is an array, fail if the index is out of bounds. * (A negative index is caught above.) This also fails if the uniform * is not an array, but the user is trying to index it, because @@ -1047,7 +1058,7 @@ _mesa_sampler_uniforms_are_valid(const struct gl_shader_program *shProg, char *errMsg, size_t errMsgLength) { /* Shader does not have samplers. */ - if (shProg->NumUserUniformStorage == 0) + if (shProg->NumUniformStorage == 0) return true; if (!shProg->SamplersValidated) { @@ -1087,7 +1098,7 @@ _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline) if (!shProg[idx]) continue; - for (unsigned i = 0; i < shProg[idx]->NumUserUniformStorage; i++) { + for (unsigned i = 0; i < shProg[idx]->NumUniformStorage; i++) { const struct gl_uniform_storage *const storage = &shProg[idx]->UniformStorage[i]; const glsl_type *const t = (storage->type->is_array()) diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index 55fa2357e38..bd7b05e207a 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -343,10 +343,6 @@ void GLAPIENTRY _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); -long -_mesa_parse_program_resource_name(const GLchar *name, - const GLchar **out_base_name_end); - unsigned _mesa_get_uniform_location(struct gl_shader_program *shProg, const GLchar *name, unsigned *offset); diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 7389037ae85..ebdd9eaf02e 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -2309,10 +2309,10 @@ print_array(const char *name, GLint index, const struct gl_client_array *array) fprintf(stderr, " %s[%d]: ", name, index); else fprintf(stderr, " %s: ", name); - fprintf(stderr, "Ptr=%p, Type=0x%x, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n", - array->Ptr, array->Type, array->Size, - array->_ElementSize, array->StrideB, - array->BufferObj->Name, (unsigned long) array->BufferObj->Size); + fprintf(stderr, "Ptr=%p, Type=%s, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n", + array->Ptr, _mesa_lookup_enum_by_nr(array->Type), array->Size, + array->_ElementSize, array->StrideB, array->BufferObj->Name, + (unsigned long) array->BufferObj->Size); } diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 699a0de46c2..8bc00ace5c4 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -51,31 +51,51 @@ check_for_ending(const char *string, const char *ending) * fwd_context is only valid if version > 0 */ static void -get_gl_override(int *version, bool *fwd_context, bool *compat_context) +get_gl_override(gl_api api, int *version, bool *fwd_context, + bool *compat_context) { - const char *env_var = "MESA_GL_VERSION_OVERRIDE"; + const char *env_var = (api == API_OPENGL_CORE || api == API_OPENGL_COMPAT) + ? "MESA_GL_VERSION_OVERRIDE" : "MESA_GLES_VERSION_OVERRIDE"; const char *version_str; int major, minor, n; - static int override_version = -1; - static bool fc_suffix = false; - static bool compat_suffix = false; + static struct override_info { + int version; + bool fc_suffix; + bool compat_suffix; + } override[] = { + { -1, false, false}, + { -1, false, false}, + { -1, false, false}, + { -1, false, false}, + }; - if (override_version < 0) { - override_version = 0; + STATIC_ASSERT(ARRAY_SIZE(override) == API_OPENGL_LAST + 1); + + if (api == API_OPENGLES) + goto exit; + + if (override[api].version < 0) { + override[api].version = 0; version_str = getenv(env_var); if (version_str) { - fc_suffix = check_for_ending(version_str, "FC"); - compat_suffix = check_for_ending(version_str, "COMPAT"); + override[api].fc_suffix = check_for_ending(version_str, "FC"); + override[api].compat_suffix = check_for_ending(version_str, "COMPAT"); n = sscanf(version_str, "%u.%u", &major, &minor); if (n != 2) { fprintf(stderr, "error: invalid value for %s: %s\n", env_var, version_str); - override_version = 0; + override[api].version = 0; } else { - override_version = major * 10 + minor; - if (override_version < 30 && fc_suffix) { + override[api].version = major * 10 + minor; + + /* There is no such thing as compatibility or forward-compatible for + * OpenGL ES 2.0 or 3.x APIs. + */ + if ((override[api].version < 30 && override[api].fc_suffix) || + (api == API_OPENGLES2 && (override[api].fc_suffix || + override[api].compat_suffix))) { fprintf(stderr, "error: invalid value for %s: %s\n", env_var, version_str); } @@ -83,9 +103,10 @@ get_gl_override(int *version, bool *fwd_context, bool *compat_context) } } - *version = override_version; - *fwd_context = fc_suffix; - *compat_context = compat_suffix; +exit: + *version = override[api].version; + *fwd_context = override[api].fc_suffix; + *compat_context = override[api].compat_suffix; } /** @@ -130,18 +151,26 @@ _mesa_override_gl_version_contextless(struct gl_constants *consts, int version; bool fwd_context, compat_context; - get_gl_override(&version, &fwd_context, &compat_context); + get_gl_override(*apiOut, &version, &fwd_context, &compat_context); if (version > 0) { *versionOut = version; - if (version >= 30 && fwd_context) { - *apiOut = API_OPENGL_CORE; - consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; - } else if (version >= 31 && !compat_context) { - *apiOut = API_OPENGL_CORE; - } else { - *apiOut = API_OPENGL_COMPAT; + + /* If the API is a desktop API, adjust the context flags. We may also + * need to modify the API depending on the version. For example, Mesa + * does not support a GL 3.3 compatibility profile. + */ + if (*apiOut == API_OPENGL_CORE || *apiOut == API_OPENGL_COMPAT) { + if (version >= 30 && fwd_context) { + *apiOut = API_OPENGL_CORE; + consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; + } else if (version >= 31 && !compat_context) { + *apiOut = API_OPENGL_CORE; + } else { + *apiOut = API_OPENGL_COMPAT; + } } + return true; } return false; @@ -157,22 +186,6 @@ _mesa_override_gl_version(struct gl_context *ctx) } /** - * Returns the gl override value - * - * version > 0 indicates there is an override requested - */ -int -_mesa_get_gl_version_override(void) -{ - int version; - bool fwd_context, compat_context; - - get_gl_override(&version, &fwd_context, &compat_context); - - return version; -} - -/** * Override the context's GLSL version if the environment variable * MESA_GLSL_VERSION_OVERRIDE is set. Valid values for * MESA_GLSL_VERSION_OVERRIDE are integers, such as "130". @@ -433,7 +446,23 @@ compute_version_es2(const struct gl_extensions *extensions) extensions->EXT_texture_snorm && extensions->NV_primitive_restart && extensions->OES_depth_texture_cube_map); - if (ver_3_0) { + const bool ver_3_1 = (ver_3_0 && + extensions->ARB_arrays_of_arrays && + extensions->ARB_compute_shader && + extensions->ARB_draw_indirect && + false /*extensions->ARB_framebuffer_no_attachments*/ && + extensions->ARB_shader_atomic_counters && + extensions->ARB_shader_image_load_store && + false /*extensions->ARB_shader_image_size*/ && + false /*extensions->ARB_shader_storage_buffer_object*/ && + extensions->ARB_shading_language_packing && + extensions->ARB_stencil_texturing && + extensions->ARB_gpu_shader5 && + extensions->EXT_shader_integer_mix); + + if (ver_3_1) { + return 31; + } else if (ver_3_0) { return 30; } else if (ver_2_0) { return 20; diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h index 450a0e31d3d..ee7cb7501eb 100644 --- a/src/mesa/main/version.h +++ b/src/mesa/main/version.h @@ -47,7 +47,4 @@ _mesa_override_gl_version(struct gl_context *ctx); extern void _mesa_override_glsl_version(struct gl_constants *consts); -extern int -_mesa_get_gl_version_override(void); - #endif /* VERSION_H */ diff --git a/src/mesa/main/vtxfmt.c b/src/mesa/main/vtxfmt.c index d7ef7e278cd..81bf4c589ea 100644 --- a/src/mesa/main/vtxfmt.c +++ b/src/mesa/main/vtxfmt.c @@ -207,7 +207,7 @@ install_vtxfmt(struct gl_context *ctx, struct _glapi_table *tab, SET_VertexAttribP4uiv(tab, vfmt->VertexAttribP4uiv); } - if (_mesa_is_desktop_gl(ctx)) { + if (ctx->API == API_OPENGL_CORE) { SET_VertexAttribL1d(tab, vfmt->VertexAttribL1d); SET_VertexAttribL2d(tab, vfmt->VertexAttribL2d); SET_VertexAttribL3d(tab, vfmt->VertexAttribL3d); diff --git a/src/mesa/program/dummy_errors.c b/src/mesa/program/dummy_errors.c new file mode 100644 index 00000000000..d69f54d1d05 --- /dev/null +++ b/src/mesa/program/dummy_errors.c @@ -0,0 +1,30 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include "main/errors.h" + +void +_mesa_error_no_memory(const char *caller) +{ + fprintf(stderr, "Mesa error: out of memory in %s", caller); +} diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index fceed712bdb..3bffe90ff1f 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -262,6 +262,7 @@ public: virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); /*@}*/ src_reg result; @@ -405,7 +406,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir, dst_reg dst, src_reg src0, src_reg src1, unsigned elements) { - static const gl_inst_opcode dot_opcodes[] = { + static const enum prog_opcode dot_opcodes[] = { OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 }; @@ -2118,6 +2119,12 @@ ir_to_mesa_visitor::visit(ir_end_primitive *) assert(!"Geometry shaders not supported."); } +void +ir_to_mesa_visitor::visit(ir_barrier *) +{ + unreachable("GLSL barrier() not supported."); +} + ir_to_mesa_visitor::ir_to_mesa_visitor() { result.file = PROGRAM_UNDEFINED; @@ -2407,9 +2414,14 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, if (!found) continue; + struct gl_uniform_storage *storage = + &shader_program->UniformStorage[location]; + + /* Do not associate any uniform storage to built-in uniforms */ + if (storage->builtin) + continue; + if (location != last_location) { - struct gl_uniform_storage *storage = - &shader_program->UniformStorage[location]; enum gl_uniform_driver_format format = uniform_native; unsigned columns = 0; @@ -2722,7 +2734,7 @@ get_mesa_program(struct gl_context *ctx, mesa_inst->Opcode = inst->op; mesa_inst->CondUpdate = inst->cond_update; if (inst->saturate) - mesa_inst->SaturateMode = SATURATE_ZERO_ONE; + mesa_inst->Saturate = GL_TRUE; mesa_inst->DstReg.File = inst->dst.file; mesa_inst->DstReg.Index = inst->dst.index; mesa_inst->DstReg.CondMask = inst->dst.cond_mask; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index 16e8e340d8d..46260b54882 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -397,7 +397,7 @@ store_vector4(const struct prog_instruction *inst, struct gl_program_machine *machine, const GLfloat value[4]) { const struct prog_dst_register *dstReg = &(inst->DstReg); - const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE; + const GLboolean clamp = inst->Saturate; GLuint writeMask = dstReg->WriteMask; GLfloat clampedValue[4]; GLfloat *dst = get_dst_register_pointer(dstReg, machine); diff --git a/src/mesa/program/prog_instruction.c b/src/mesa/program/prog_instruction.c index f9ebe4e8fd2..21ef35337f6 100644 --- a/src/mesa/program/prog_instruction.c +++ b/src/mesa/program/prog_instruction.c @@ -55,7 +55,7 @@ _mesa_init_instructions(struct prog_instruction *inst, GLuint count) inst[i].DstReg.CondMask = COND_TR; inst[i].DstReg.CondSwizzle = SWIZZLE_NOOP; - inst[i].SaturateMode = SATURATE_OFF; + inst[i].Saturate = GL_FALSE; inst[i].Precision = FLOAT32; } } @@ -114,7 +114,7 @@ _mesa_free_instructions(struct prog_instruction *inst, GLuint count) */ struct instruction_info { - gl_inst_opcode Opcode; + enum prog_opcode Opcode; const char *Name; GLuint NumSrcRegs; GLuint NumDstRegs; @@ -198,7 +198,7 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { * Return the number of src registers for the given instruction/opcode. */ GLuint -_mesa_num_inst_src_regs(gl_inst_opcode opcode) +_mesa_num_inst_src_regs(enum prog_opcode opcode) { assert(opcode < MAX_OPCODE); assert(opcode == InstInfo[opcode].Opcode); @@ -211,7 +211,7 @@ _mesa_num_inst_src_regs(gl_inst_opcode opcode) * Return the number of dst registers for the given instruction/opcode. */ GLuint -_mesa_num_inst_dst_regs(gl_inst_opcode opcode) +_mesa_num_inst_dst_regs(enum prog_opcode opcode) { assert(opcode < MAX_OPCODE); assert(opcode == InstInfo[opcode].Opcode); @@ -221,7 +221,7 @@ _mesa_num_inst_dst_regs(gl_inst_opcode opcode) GLboolean -_mesa_is_tex_instruction(gl_inst_opcode opcode) +_mesa_is_tex_instruction(enum prog_opcode opcode) { return (opcode == OPCODE_TEX || opcode == OPCODE_TXB || @@ -285,7 +285,7 @@ _mesa_check_soa_dependencies(const struct prog_instruction *inst) * Return string name for given program opcode. */ const char * -_mesa_opcode_string(gl_inst_opcode opcode) +_mesa_opcode_string(enum prog_opcode opcode) { if (opcode < MAX_OPCODE) return InstInfo[opcode].Name; diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index 96da198f86d..d56f96cfaa1 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -118,15 +118,6 @@ /** - * Saturation modes when storing values. - */ -/*@{*/ -#define SATURATE_OFF 0 -#define SATURATE_ZERO_ONE 1 -/*@}*/ - - -/** * Per-component negation masks */ /*@{*/ @@ -143,7 +134,7 @@ /** * Program instruction opcodes for vertex, fragment and geometry programs. */ -typedef enum prog_opcode { +enum prog_opcode { /* ARB_vp ARB_fp NV_vp NV_fp GLSL */ /*------------------------------------------*/ OPCODE_NOP = 0, /* X */ @@ -213,7 +204,7 @@ typedef enum prog_opcode { OPCODE_TRUNC, /* X */ OPCODE_XPD, /* X X */ MAX_OPCODE -} gl_inst_opcode; +}; /** @@ -300,7 +291,7 @@ struct prog_dst_register */ struct prog_instruction { - gl_inst_opcode Opcode; + enum prog_opcode Opcode; struct prog_src_register SrcReg[3]; struct prog_dst_register DstReg; @@ -327,15 +318,12 @@ struct prog_instruction GLuint CondDst:1; /** - * Saturate each value of the vectored result to the range [0,1] or the - * range [-1,1]. \c SSAT mode (i.e., saturation to the range [-1,1]) is - * only available in NV_fragment_program2 mode. - * Value is one of the SATURATE_* tokens. + * Saturate each value of the vectored result to the range [0,1]. * * \since * NV_fragment_program_option, NV_vertex_program3. */ - GLuint SaturateMode:2; + GLuint Saturate:1; /** * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12. @@ -368,9 +356,6 @@ struct prog_instruction */ GLint BranchTarget; - /** for driver use (try to remove someday) */ - GLint Aux; - /** for debugging purposes */ const char *Comment; }; @@ -394,19 +379,19 @@ extern void _mesa_free_instructions(struct prog_instruction *inst, GLuint count); extern GLuint -_mesa_num_inst_src_regs(gl_inst_opcode opcode); +_mesa_num_inst_src_regs(enum prog_opcode opcode); extern GLuint -_mesa_num_inst_dst_regs(gl_inst_opcode opcode); +_mesa_num_inst_dst_regs(enum prog_opcode opcode); extern GLboolean -_mesa_is_tex_instruction(gl_inst_opcode opcode); +_mesa_is_tex_instruction(enum prog_opcode opcode); extern GLboolean _mesa_check_soa_dependencies(const struct prog_instruction *inst); extern const char * -_mesa_opcode_string(gl_inst_opcode opcode); +_mesa_opcode_string(enum prog_opcode opcode); #ifdef __cplusplus diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 6d4485acb65..f9e9035fc3e 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -478,7 +478,7 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov) return can_downward_mov_be_modifed(mov) && mov->DstReg.File == PROGRAM_TEMPORARY && - mov->SaturateMode == SATURATE_OFF; + !mov->Saturate; } @@ -653,7 +653,7 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst, if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; - inst->SaturateMode |= mov->SaturateMode; + inst->Saturate |= mov->Saturate; /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index d588d07ffe4..e4faa63c06f 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -600,7 +600,7 @@ _mesa_fprint_alu_instruction(FILE *f, fprintf(f, ".C"); /* frag prog only */ - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); @@ -658,7 +658,7 @@ _mesa_fprint_instruction_opt(FILE *f, switch (inst->Opcode) { case OPCODE_SWZ: fprintf(f, "SWZ"); - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); @@ -675,7 +675,7 @@ _mesa_fprint_instruction_opt(FILE *f, case OPCODE_TXB: case OPCODE_TXD: fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) fprintf(f, "_SAT"); fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); @@ -864,7 +864,7 @@ _mesa_fprint_program_opt(FILE *f, else fprintf(f, "# Fragment Program/Shader %u\n", prog->Id); break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: fprintf(f, "# Geometry Shader\n"); } diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 0c0c87faa28..bdb335e4ba3 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -244,14 +244,14 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], { /* state[1] is the texture unit */ const GLuint unit = (GLuint) state[1]; - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4V(value, ctx->Texture.Unit[unit].EnvColor); else COPY_4V(value, ctx->Texture.Unit[unit].EnvColorUnclamped); } return; case STATE_FOG_COLOR: - if (_mesa_get_clamp_fragment_color(ctx)) + if (_mesa_get_clamp_fragment_color(ctx, ctx->DrawBuffer)) COPY_4V(value, ctx->Fog.Color); else COPY_4V(value, ctx->Fog.ColorUnclamped); diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 6c5fa51ec61..d54f934247d 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -47,6 +47,7 @@ struct ptn_compile { nir_builder build; bool error; + nir_variable *parameters; nir_variable *input_vars[VARYING_SLOT_MAX]; nir_variable *output_vars[VARYING_SLOT_MAX]; nir_register **output_regs; @@ -112,21 +113,6 @@ ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst) return dest; } -/** - * Multiply the contents of the ADDR register by 4 to convert from the number - * of vec4s to the number of floating point components. - */ -static nir_ssa_def * -ptn_addr_reg_value(struct ptn_compile *c) -{ - nir_builder *b = &c->build; - nir_alu_src src; - memset(&src, 0, sizeof(src)); - src.src = nir_src_for_reg(c->addr_reg); - - return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4)); -} - static nir_ssa_def * ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) { @@ -180,27 +166,38 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) } /* FALLTHROUGH */ case PROGRAM_STATE_VAR: { - nir_intrinsic_op load_op = - prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); load->num_components = 4; - /* Multiply src->Index by 4 to scale from # of vec4s to components. */ - load->const_index[0] = 4 * prog_src->Index; - load->const_index[1] = 1; + load->variables[0] = nir_deref_var_create(load, c->parameters); + nir_deref_array *deref_arr = + nir_deref_array_create(load->variables[0]); + deref_arr->deref.type = glsl_vec4_type(); + load->variables[0]->deref.child = &deref_arr->deref; if (prog_src->RelAddr) { - nir_ssa_def *reladdr = ptn_addr_reg_value(c); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + + nir_alu_src addr_src = { NIR_SRC_INIT }; + addr_src.src = nir_src_for_reg(c->addr_reg); + nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1); + if (prog_src->Index < 0) { /* This is a negative offset which should be added to the address * register's value. */ - reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0])); - load->const_index[0] = 0; + reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index)); + + deref_arr->base_offset = 0; + } else { + deref_arr->base_offset = prog_src->Index; } - load->src[0] = nir_src_for_ssa(reladdr); + deref_arr->indirect = nir_src_for_ssa(reladdr); + } else { + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = prog_src->Index; } nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); @@ -700,7 +697,7 @@ static const nir_op op_trans[MAX_OPCODE] = { [OPCODE_ADD] = nir_op_fadd, [OPCODE_ARL] = 0, [OPCODE_CMP] = 0, - [OPCODE_COS] = nir_op_fcos, + [OPCODE_COS] = 0, [OPCODE_DDX] = nir_op_fddx, [OPCODE_DDY] = nir_op_fddy, [OPCODE_DP2] = 0, @@ -709,11 +706,11 @@ static const nir_op op_trans[MAX_OPCODE] = { [OPCODE_DPH] = 0, [OPCODE_DST] = 0, [OPCODE_END] = 0, - [OPCODE_EX2] = nir_op_fexp2, + [OPCODE_EX2] = 0, [OPCODE_EXP] = 0, [OPCODE_FLR] = nir_op_ffloor, [OPCODE_FRC] = nir_op_ffract, - [OPCODE_LG2] = nir_op_flog2, + [OPCODE_LG2] = 0, [OPCODE_LIT] = 0, [OPCODE_LOG] = 0, [OPCODE_LRP] = 0, @@ -722,15 +719,15 @@ static const nir_op op_trans[MAX_OPCODE] = { [OPCODE_MIN] = nir_op_fmin, [OPCODE_MOV] = nir_op_fmov, [OPCODE_MUL] = nir_op_fmul, - [OPCODE_POW] = nir_op_fpow, - [OPCODE_RCP] = nir_op_frcp, + [OPCODE_POW] = 0, + [OPCODE_RCP] = 0, - [OPCODE_RSQ] = nir_op_frsq, + [OPCODE_RSQ] = 0, [OPCODE_SCS] = 0, [OPCODE_SEQ] = 0, [OPCODE_SGE] = 0, [OPCODE_SGT] = 0, - [OPCODE_SIN] = nir_op_fsin, + [OPCODE_SIN] = 0, [OPCODE_SLE] = 0, [OPCODE_SLT] = 0, [OPCODE_SNE] = 0, @@ -767,7 +764,8 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) switch (op) { case OPCODE_RSQ: - ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X))); + ptn_move_dest(b, dest, + nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)))); break; case OPCODE_RCP: @@ -894,7 +892,7 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) break; default: - if (op_trans[op] != 0 || op == OPCODE_MOV) { + if (op_trans[op] != 0) { ptn_alu(b, op_trans[op], dest, src); } else { fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op)); @@ -903,8 +901,8 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) break; } - if (prog_inst->SaturateMode) { - assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE); + if (prog_inst->Saturate) { + assert(prog_inst->Saturate); assert(!dest.dest.is_ssa); ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest))); } @@ -926,10 +924,23 @@ ptn_add_output_stores(struct ptn_compile *c) foreach_list_typed(nir_variable, var, node, &b->shader->outputs) { nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->num_components = 4; + store->num_components = glsl_get_vector_elements(var->type); store->variables[0] = nir_deref_var_create(store, c->output_vars[var->data.location]); - store->src[0].reg.reg = c->output_regs[var->data.location]; + + if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + var->data.location == FRAG_RESULT_DEPTH) { + /* result.depth has this strange convention of being the .z component of + * a vec4 with undefined .xyw components. We resolve it to a scalar, to + * match GLSL's gl_FragDepth and the expectations of most backends. + */ + nir_alu_src alu_src = { NIR_SRC_INIT }; + alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]); + alu_src.swizzle[0] = SWIZZLE_Z; + store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1)); + } else { + store->src[0].reg.reg = c->output_regs[var->data.location]; + } nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr); } } @@ -1022,7 +1033,10 @@ setup_registers_and_variables(struct ptn_compile *c) reg->num_components = 4; nir_variable *var = rzalloc(shader, nir_variable); - var->type = glsl_vec4_type(); + if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) + var->type = glsl_float_type(); + else + var->type = glsl_vec4_type(); var->data.mode = nir_var_shader_out; var->name = ralloc_asprintf(var, "out_%d", i); @@ -1057,13 +1071,11 @@ setup_registers_and_variables(struct ptn_compile *c) } reg->num_components = 1; c->addr_reg = reg; - - /* Set the number of uniforms */ - shader->num_uniforms = 4 * c->prog->Parameters->NumParameters; } struct nir_shader * -prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options) +prog_to_nir(const struct gl_program *prog, + const nir_shader_compiler_options *options) { struct ptn_compile *c; struct nir_shader *s; @@ -1076,6 +1088,14 @@ prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *op goto fail; c->prog = prog; + c->parameters = rzalloc(s, nir_variable); + c->parameters->type = glsl_array_type(glsl_vec4_type(), + prog->Parameters->NumParameters); + c->parameters->name = "parameters"; + c->parameters->data.read_only = true; + c->parameters->data.mode = nir_var_uniform; + exec_list_push_tail(&s->uniforms, &c->parameters->node); + nir_function *func = nir_function_create(s, "main"); nir_function_overload *overload = nir_function_overload_create(func); nir_function_impl *impl = nir_function_impl_create(overload); diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index fb61f4d360d..c13e61b1630 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -97,13 +97,6 @@ _mesa_init_program(struct gl_context *ctx) assert(ctx->FragmentProgram.Current); ctx->FragmentProgram.Cache = _mesa_new_program_cache(); - ctx->GeometryProgram.Enabled = GL_FALSE; - /* right now by default we don't have a geometry program */ - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, - NULL); - - _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL); - /* XXX probably move this stuff */ ctx->ATIFragmentShader.Enabled = GL_FALSE; ctx->ATIFragmentShader.Current = ctx->Shared->DefaultFragmentShader; @@ -122,8 +115,6 @@ _mesa_free_program_data(struct gl_context *ctx) _mesa_delete_program_cache(ctx, ctx->VertexProgram.Cache); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); _mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache); - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL); - _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL); /* XXX probably move this stuff */ if (ctx->ATIFragmentShader.Current) { @@ -153,9 +144,6 @@ _mesa_update_default_objects_program(struct gl_context *ctx) ctx->Shared->DefaultFragmentProgram); assert(ctx->FragmentProgram.Current); - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, - ctx->Shared->DefaultGeometryProgram); - /* XXX probably move this stuff */ if (ctx->ATIFragmentShader.Current) { ctx->ATIFragmentShader.Current->RefCount--; @@ -340,7 +328,7 @@ _mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id) CALLOC_STRUCT(gl_fragment_program), target, id ); break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: prog = _mesa_init_geometry_program(ctx, CALLOC_STRUCT(gl_geometry_program), target, id); @@ -426,8 +414,8 @@ _mesa_reference_program_(struct gl_context *ctx, else if ((*ptr)->Target == GL_FRAGMENT_PROGRAM_ARB) assert(prog->Target == GL_FRAGMENT_PROGRAM_ARB || prog->Target == GL_FRAGMENT_PROGRAM_NV); - else if ((*ptr)->Target == MESA_GEOMETRY_PROGRAM) - assert(prog->Target == MESA_GEOMETRY_PROGRAM); + else if ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV) + assert(prog->Target == GL_GEOMETRY_PROGRAM_NV); } #endif @@ -439,7 +427,7 @@ _mesa_reference_program_(struct gl_context *ctx, printf("Program %p ID=%u Target=%s Refcount-- to %d\n", *ptr, (*ptr)->Id, ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" : - ((*ptr)->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")), + ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")), (*ptr)->RefCount - 1); #endif assert((*ptr)->RefCount > 0); @@ -464,7 +452,7 @@ _mesa_reference_program_(struct gl_context *ctx, printf("Program %p ID=%u Target=%s Refcount++ to %d\n", prog, prog->Id, (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" : - (prog->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")), + (prog->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")), prog->RefCount); #endif /*mtx_unlock(&prog->Mutex);*/ @@ -554,7 +542,7 @@ _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog) fpc->PixelCenterInteger = fp->PixelCenterInteger; } break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: { const struct gl_geometry_program *gp = gl_geometry_program_const(prog); struct gl_geometry_program *gpc = gl_geometry_program(clone); diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index 716b83d2d07..635f5d09d60 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -84,7 +84,7 @@ static void asm_instruction_set_operands(struct asm_instruction *inst, const struct prog_dst_register *dst, const struct asm_src_register *src0, const struct asm_src_register *src1, const struct asm_src_register *src2); -static struct asm_instruction *asm_instruction_ctor(gl_inst_opcode op, +static struct asm_instruction *asm_instruction_ctor(enum prog_opcode op, const struct prog_dst_register *dst, const struct asm_src_register *src0, const struct asm_src_register *src1, const struct asm_src_register *src2); @@ -139,7 +139,7 @@ static struct asm_instruction *asm_instruction_copy_ctor( gl_state_index state[STATE_LENGTH]; int negate; struct asm_vector vector; - gl_inst_opcode opcode; + enum prog_opcode opcode; struct { unsigned swz; @@ -2275,7 +2275,7 @@ asm_instruction_set_operands(struct asm_instruction *inst, struct asm_instruction * -asm_instruction_ctor(gl_inst_opcode op, +asm_instruction_ctor(enum prog_opcode op, const struct prog_dst_register *dst, const struct asm_src_register *src0, const struct asm_src_register *src1, @@ -2308,7 +2308,7 @@ asm_instruction_copy_ctor(const struct prog_instruction *base, inst->Base.Opcode = base->Opcode; inst->Base.CondUpdate = base->CondUpdate; inst->Base.CondDst = base->CondDst; - inst->Base.SaturateMode = base->SaturateMode; + inst->Base.Saturate = base->Saturate; inst->Base.Precision = base->Precision; asm_instruction_set_operands(inst, dst, src0, src1, src2); diff --git a/src/mesa/program/program_parse_extra.c b/src/mesa/program/program_parse_extra.c index a9e36404580..32b54afc57b 100644 --- a/src/mesa/program/program_parse_extra.c +++ b/src/mesa/program/program_parse_extra.c @@ -40,7 +40,7 @@ _mesa_parse_instruction_suffix(const struct asm_parser_state *state, { inst->CondUpdate = 0; inst->CondDst = 0; - inst->SaturateMode = SATURATE_OFF; + inst->Saturate = GL_FALSE; inst->Precision = FLOAT32; @@ -82,7 +82,7 @@ _mesa_parse_instruction_suffix(const struct asm_parser_state *state, */ if (state->mode == ARB_fragment) { if (strcmp(suffix, "_SAT") == 0) { - inst->SaturateMode = SATURATE_ZERO_ONE; + inst->Saturate = GL_TRUE; suffix += 4; } } diff --git a/src/mesa/program/programopt.c b/src/mesa/program/programopt.c index e82c68a5305..af78150d594 100644 --- a/src/mesa/program/programopt.c +++ b/src/mesa/program/programopt.c @@ -305,7 +305,7 @@ _mesa_append_fog_code(struct gl_context *ctx, /* change the instruction to write to colorTemp w/ clamping */ inst->DstReg.File = PROGRAM_TEMPORARY; inst->DstReg.Index = colorTemp; - inst->SaturateMode = saturate; + inst->Saturate = saturate; /* don't break (may be several writes to result.color) */ } inst++; @@ -331,7 +331,7 @@ _mesa_append_fog_code(struct gl_context *ctx, inst->SrcReg[2].File = PROGRAM_STATE_VAR; inst->SrcReg[2].Index = fogPRefOpt; inst->SrcReg[2].Swizzle = SWIZZLE_YYYY; - inst->SaturateMode = SATURATE_ZERO_ONE; + inst->Saturate = GL_TRUE; inst++; } else { @@ -374,7 +374,7 @@ _mesa_append_fog_code(struct gl_context *ctx, inst->SrcReg[0].Index = fogFactorTemp; inst->SrcReg[0].Negate = NEGATE_XYZW; inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; - inst->SaturateMode = SATURATE_ZERO_ONE; + inst->Saturate = GL_TRUE; inst++; } /* LRP result.color.xyz, fogFactorTemp.xxxx, colorTemp, fogColorRef; */ diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index b195c55b347..ae883a2535e 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -134,7 +134,10 @@ update_framebuffer_state( struct st_context *st ) else { strb = st_renderbuffer(fb->Attachment[BUFFER_STENCIL].Renderbuffer); if (strb) { - assert(strb->surface); + if (strb->is_rtt) { + /* rendering to a GL texture, may have to update surface */ + st_update_renderbuffer_surface(st, strb); + } pipe_surface_reference(&framebuffer->zsbuf, strb->surface); update_framebuffer_size(framebuffer, strb->surface); } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 629f54f25de..ad8d2624fc9 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -189,7 +189,7 @@ update_gp( struct st_context *st ) } stgp = st_geometry_program(st->ctx->GeometryProgram._Current); - assert(stgp->Base.Base.Target == MESA_GEOMETRY_PROGRAM); + assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV); memset(&key, 0, sizeof(key)); key.st = st; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 2107ab16739..c881e194f70 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -452,6 +452,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_save_fragment_shader(cso); cso_save_stream_outputs(cso); cso_save_vertex_shader(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_geometry_shader(cso); cso_save_vertex_elements(cso); cso_save_aux_vertex_buffer_slot(cso); @@ -466,7 +468,9 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* vertex shader state: position + texcoord pass-through */ cso_set_vertex_shader_handle(cso, st->bitmap.vs); - /* geometry shader state: disabled */ + /* disable other shaders */ + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); /* user samplers, plus our bitmap sampler */ @@ -536,6 +540,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_restore_viewport(cso); cso_restore_fragment_shader(cso); cso_restore_vertex_shader(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_vertex_elements(cso); cso_restore_aux_vertex_buffer_slot(cso); diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index bbaedd108f6..6d9371852c5 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -36,6 +36,7 @@ #include "st_context.h" #include "st_texture.h" +#include "st_cb_bitmap.h" #include "st_cb_blit.h" #include "st_cb_fbo.h" #include "st_atom.h" @@ -93,6 +94,9 @@ st_BlitFramebuffer(struct gl_context *ctx, st_validate_state(st); + /* Make sure bitmap rendering has landed in the framebuffers */ + st_flush_bitmap_cache(st); + clip.srcX0 = srcX0; clip.srcY0 = srcY0; clip.srcX1 = srcX1; diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index f10e9063ac7..137fac8a9a9 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -265,6 +265,8 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) cso_save_fragment_shader(st->cso_context); cso_save_stream_outputs(st->cso_context); cso_save_vertex_shader(st->cso_context); + cso_save_tessctrl_shader(st->cso_context); + cso_save_tesseval_shader(st->cso_context); cso_save_geometry_shader(st->cso_context); cso_save_vertex_elements(st->cso_context); cso_save_aux_vertex_buffer_slot(st->cso_context); @@ -347,6 +349,8 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) } set_fragment_shader(st); + cso_set_tessctrl_shader_handle(st->cso_context, NULL); + cso_set_tesseval_shader_handle(st->cso_context, NULL); if (num_layers > 1) set_vertex_shader_layered(st); @@ -371,6 +375,8 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) cso_restore_viewport(st->cso_context); cso_restore_fragment_shader(st->cso_context); cso_restore_vertex_shader(st->cso_context); + cso_restore_tessctrl_shader(st->cso_context); + cso_restore_tesseval_shader(st->cso_context); cso_restore_geometry_shader(st->cso_context); cso_restore_vertex_elements(st->cso_context); cso_restore_aux_vertex_buffer_slot(st->cso_context); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 3edf31bad52..a6a98c83aa6 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -693,6 +693,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_save_fragment_shader(cso); cso_save_stream_outputs(cso); cso_save_vertex_shader(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_geometry_shader(cso); cso_save_vertex_elements(cso); cso_save_aux_vertex_buffer_slot(cso); @@ -746,7 +748,9 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* vertex shader state: position + texcoord pass-through */ cso_set_vertex_shader_handle(cso, driver_vp); - /* geometry shader state: disabled */ + /* disable other shaders */ + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); /* texture sampling state: */ @@ -816,6 +820,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT); cso_restore_fragment_shader(cso); cso_restore_vertex_shader(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_vertex_elements(cso); cso_restore_aux_vertex_buffer_slot(cso); diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index 1420b96e55a..2af4f6d4cf6 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -229,6 +229,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, cso_save_viewport(cso); cso_save_stream_outputs(cso); cso_save_vertex_shader(cso); + cso_save_tessctrl_shader(cso); + cso_save_tesseval_shader(cso); cso_save_geometry_shader(cso); cso_save_vertex_elements(cso); cso_save_aux_vertex_buffer_slot(cso); @@ -238,6 +240,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, semantic_names, semantic_indexes); cso_set_vertex_shader_handle(cso, vs); } + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); for (i = 0; i < numAttribs; i++) { @@ -279,6 +283,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, /* restore state */ cso_restore_viewport(cso); cso_restore_vertex_shader(cso); + cso_restore_tessctrl_shader(cso); + cso_restore_tesseval_shader(cso); cso_restore_geometry_shader(cso); cso_restore_vertex_elements(cso); cso_restore_aux_vertex_buffer_slot(cso); diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 296ea1e0d29..0399eef7204 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -842,7 +842,7 @@ void st_init_fbo_functions(struct dd_function_table *functions) functions->NewFramebuffer = st_new_framebuffer; functions->NewRenderbuffer = st_new_renderbuffer; functions->BindFramebuffer = st_bind_framebuffer; - functions->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer; + functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw; functions->RenderTexture = st_render_texture; functions->FinishRenderTexture = st_finish_render_texture; functions->ValidateFramebuffer = st_validate_framebuffer; diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index ca51eeee366..82affd2de3e 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -141,11 +141,44 @@ static void st_glFinish(struct gl_context *ctx) } -void st_init_flush_functions(struct dd_function_table *functions) +/** + * Query information about GPU resets observed by this context + * + * Called via \c dd_function_table::GetGraphicsResetStatus. + */ +static GLenum +st_get_graphics_reset_status(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + enum pipe_reset_status status; + + status = st->pipe->get_device_reset_status(st->pipe); + + switch (status) { + case PIPE_NO_RESET: + return GL_NO_ERROR; + case PIPE_GUILTY_CONTEXT_RESET: + return GL_GUILTY_CONTEXT_RESET_ARB; + case PIPE_INNOCENT_CONTEXT_RESET: + return GL_INNOCENT_CONTEXT_RESET_ARB; + case PIPE_UNKNOWN_CONTEXT_RESET: + return GL_UNKNOWN_CONTEXT_RESET_ARB; + default: + assert(0); + return GL_NO_ERROR; + } +} + + +void st_init_flush_functions(struct pipe_screen *screen, + struct dd_function_table *functions) { functions->Flush = st_glFlush; functions->Finish = st_glFinish; + if (screen->get_param(screen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) + functions->GetGraphicsResetStatus = st_get_graphics_reset_status; + /* Windows opengl32.dll calls glFinish prior to every swapbuffers. * This is unnecessary and degrades performance. Luckily we have some * scope to work around this, as the externally-visible behaviour of diff --git a/src/mesa/state_tracker/st_cb_flush.h b/src/mesa/state_tracker/st_cb_flush.h index 84ffc63ae13..f92dcd56b64 100644 --- a/src/mesa/state_tracker/st_cb_flush.h +++ b/src/mesa/state_tracker/st_cb_flush.h @@ -37,7 +37,8 @@ struct pipe_fence_handle; struct st_context; extern void -st_init_flush_functions(struct dd_function_table *functions); +st_init_flush_functions(struct pipe_screen *screen, + struct dd_function_table *functions); extern void st_flush(struct st_context *st, diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index c382d7d2ca3..6aa7d5796d9 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -65,7 +65,7 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog) case GL_FRAGMENT_PROGRAM_ARB: st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; break; } @@ -105,7 +105,7 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id) return _mesa_init_fragment_program(ctx, &prog->Base, target, id); } - case MESA_GEOMETRY_PROGRAM: { + case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program); return _mesa_init_geometry_program(ctx, &prog->Base, target, id); } @@ -135,7 +135,7 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); } break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *stgp = (struct st_geometry_program *) prog; @@ -198,7 +198,7 @@ st_program_string_notify( struct gl_context *ctx, if (st->fp == stfp) st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; } - else if (target == MESA_GEOMETRY_PROGRAM) { + else if (target == GL_GEOMETRY_PROGRAM_NV) { struct st_geometry_program *stgp = (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index bfb9c8406bd..ed9ed0f1b6c 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -321,7 +321,7 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, struct st_context *st; memset(&funcs, 0, sizeof(funcs)); - st_init_driver_functions(&funcs); + st_init_driver_functions(pipe->screen, &funcs); ctx = _mesa_create_context(api, visual, shareCtx, &funcs); if (!ctx) { @@ -376,12 +376,6 @@ void st_destroy_context( struct st_context *st ) } pipe_surface_reference(&st->state.framebuffer.zsbuf, NULL); - pipe->set_index_buffer(pipe, NULL); - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - pipe->set_constant_buffer(pipe, i, 0, NULL); - } - _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); _vbo_DestroyContext(st->ctx); @@ -401,7 +395,8 @@ void st_destroy_context( struct st_context *st ) } -void st_init_driver_functions(struct dd_function_table *functions) +void st_init_driver_functions(struct pipe_screen *screen, + struct dd_function_table *functions) { _mesa_init_shader_object_functions(functions); _mesa_init_sampler_object_functions(functions); @@ -429,7 +424,7 @@ void st_init_driver_functions(struct dd_function_table *functions) st_init_readpixels_functions(functions); st_init_texture_functions(functions); st_init_texture_barrier_functions(functions); - st_init_flush_functions(functions); + st_init_flush_functions(screen, functions); st_init_string_functions(functions); st_init_viewport_functions(functions); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 8a9504bb7c1..dac5a4b9006 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -237,7 +237,8 @@ struct st_framebuffer }; -extern void st_init_driver_functions(struct dd_function_table *functions); +extern void st_init_driver_functions(struct pipe_screen *screen, + struct dd_function_table *functions); void st_invalidate_state(struct gl_context * ctx, GLuint new_state); diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 488f6ead201..8b43582c14b 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -141,7 +141,7 @@ check_uniforms(struct gl_context *ctx) if (shProg[j] == NULL || !shProg[j]->LinkStatus) continue; - for (i = 0; i < shProg[j]->NumUserUniformStorage; i++) { + for (i = 0; i < shProg[j]->NumUniformStorage; i++) { const struct gl_uniform_storage *u = &shProg[j]->UniformStorage[i]; if (!u->initialized) { _mesa_warning(ctx, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 1fea8600a75..25e30c7deb2 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -57,11 +57,6 @@ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) -/** - * Maximum number of arrays - */ -#define MAX_ARRAYS 256 - #define MAX_GLSL_TEXTURE_OFFSET 4 class st_src_reg; @@ -89,6 +84,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } st_src_reg(gl_register_file file, int index, int type) @@ -103,6 +99,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } st_src_reg(gl_register_file file, int index, int type, int index2D) @@ -117,6 +114,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } st_src_reg() @@ -131,6 +129,7 @@ public: this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = 0; } explicit st_src_reg(st_dst_reg reg); @@ -150,6 +149,7 @@ public: * currently used for input mapping only. */ bool double_reg2; + unsigned array_id; }; class st_dst_reg { @@ -162,6 +162,7 @@ public: this->cond_mask = COND_TR; this->reladdr = NULL; this->type = type; + this->array_id = 0; } st_dst_reg(gl_register_file file, int writemask, int type) @@ -172,6 +173,7 @@ public: this->cond_mask = COND_TR; this->reladdr = NULL; this->type = type; + this->array_id = 0; } st_dst_reg() @@ -182,6 +184,7 @@ public: this->writemask = 0; this->cond_mask = COND_TR; this->reladdr = NULL; + this->array_id = 0; } explicit st_dst_reg(st_src_reg reg); @@ -193,6 +196,7 @@ public: int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; + unsigned array_id; }; st_src_reg::st_src_reg(st_dst_reg reg) @@ -207,6 +211,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; + this->array_id = reg.array_id; } st_dst_reg::st_dst_reg(st_src_reg reg) @@ -217,6 +222,7 @@ st_dst_reg::st_dst_reg(st_src_reg reg) this->writemask = WRITEMASK_XYZW; this->cond_mask = COND_TR; this->reladdr = reg.reladdr; + this->array_id = reg.array_id; } class glsl_to_tgsi_instruction : public exec_node { @@ -233,6 +239,7 @@ public: st_src_reg sampler; /**< sampler register */ int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */ int tex_target; /**< One of TEXTURE_*_INDEX */ + glsl_base_type tex_type; GLboolean tex_shadow; st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; @@ -244,8 +251,9 @@ public: class variable_storage : public exec_node { public: - variable_storage(ir_variable *var, gl_register_file file, int index) - : file(file), index(index), var(var) + variable_storage(ir_variable *var, gl_register_file file, int index, + unsigned array_id = 0) + : file(file), index(index), var(var), array_id(array_id) { /* empty */ } @@ -253,6 +261,7 @@ public: gl_register_file file; int index; ir_variable *var; /* variable that maps to this, if any */ + unsigned array_id; }; class immediate_storage : public exec_node { @@ -302,6 +311,15 @@ public: st_src_reg return_reg; }; +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); + +struct array_decl { + unsigned mesa_index; + unsigned array_id; + unsigned array_size; +}; + struct glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -317,11 +335,19 @@ public: int next_temp; - unsigned array_sizes[MAX_ARRAYS]; + unsigned *array_sizes; + unsigned max_num_arrays; unsigned next_array; + struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS]; + unsigned num_input_arrays; + struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS]; + unsigned num_output_arrays; + int num_address_regs; int samplers_used; + glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; + int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ bool indirect_addr_consts; int wpos_transform_const; @@ -372,6 +398,7 @@ public: virtual void visit(ir_if *); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); /*@}*/ st_src_reg result; @@ -390,31 +417,19 @@ public: /** List of glsl_to_tgsi_instruction */ exec_list instructions; - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); - - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0); - - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst = undef_dst, + st_src_reg src0 = undef_src, + st_src_reg src1 = undef_src, + st_src_reg src2 = undef_src, + st_src_reg src3 = undef_src); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, st_src_reg src1); - - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, st_src_reg src2); - - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3); - - glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3); + glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, + st_src_reg src0 = undef_src, + st_src_reg src1 = undef_src, + st_src_reg src2 = undef_src, + st_src_reg src3 = undef_src); unsigned get_opcode(ir_instruction *ir, unsigned op, st_dst_reg dst, @@ -468,10 +483,6 @@ public: void *mem_ctx; }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); - -static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); - static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0); static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1); static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2); @@ -526,10 +537,10 @@ num_inst_src_regs(unsigned opcode) } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3) { glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); int num_reladdr = 0, i, j; @@ -571,6 +582,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->src[3] = src3; inst->ir = ir; inst->dead_mask = 0; + /* default to float, for paths where this is not initialized + * (since 0==UINT which is likely wrong): + */ + inst->tex_type = GLSL_TYPE_FLOAT; inst->function = NULL; @@ -716,48 +731,12 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3) +glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3) { - return emit(ir, op, dst, undef_dst, src0, src1, src2, src3); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, - st_src_reg src1, st_src_reg src2) -{ - return emit(ir, op, dst, undef_dst, src0, src1, src2, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0, st_src_reg src1) -{ - return emit(ir, op, dst, undef_dst, src0, src1, undef_src, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_src_reg src0) -{ - assert(dst.writemask != 0); - return emit(ir, op, dst, undef_dst, src0, undef_src, undef_src, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, - st_dst_reg dst, st_dst_reg dst1, st_src_reg src0) -{ - return emit(ir, op, dst, dst1, src0, undef_src, undef_src, undef_src); -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) -{ - return emit(ir, op, undef_dst, undef_dst, undef_src, undef_src, undef_src, undef_src); + return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3); } /** @@ -879,7 +858,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; - return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** @@ -929,7 +908,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, src1_swiz, src1_swiz); dst.writemask = this_mask; - emit(ir, op, dst, src0, src1); + emit_asm(ir, op, dst, src0, src1); done_mask |= this_mask; } } @@ -958,7 +937,7 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, if (dst.index >= this->num_address_regs) this->num_address_regs = dst.index + 1; - emit(NULL, op, dst, src0); + emit_asm(NULL, op, dst, src0); } int @@ -1142,6 +1121,12 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) if (!options->EmitNoIndirectTemp && (type->is_array() || type->is_matrix())) { + if (next_array >= max_num_arrays) { + max_num_arrays += 32; + array_sizes = (unsigned*) + realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays); + } + src.file = PROGRAM_ARRAY; src.index = next_array << 16 | 0x8000; array_sizes[next_array] = type_size(type); @@ -1242,7 +1227,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) */ st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; - emit(ir, TGSI_OPCODE_MOV, dst, src); + emit_asm(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ dst.index++; } @@ -1261,11 +1246,11 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) void glsl_to_tgsi_visitor::visit(ir_loop *ir) { - emit(NULL, TGSI_OPCODE_BGNLOOP); + emit_asm(NULL, TGSI_OPCODE_BGNLOOP); visit_exec_list(&ir->body_instructions, this); - emit(NULL, TGSI_OPCODE_ENDLOOP); + emit_asm(NULL, TGSI_OPCODE_ENDLOOP); } void @@ -1273,10 +1258,10 @@ glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) { switch (ir->mode) { case ir_loop_jump::jump_break: - emit(NULL, TGSI_OPCODE_BRK); + emit_asm(NULL, TGSI_OPCODE_BRK); break; case ir_loop_jump::jump_continue: - emit(NULL, TGSI_OPCODE_CONT); + emit_asm(NULL, TGSI_OPCODE_CONT); break; } } @@ -1330,7 +1315,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) this->result = get_temp(ir->type); result_dst = st_dst_reg(this->result); result_dst.writemask = (1 << ir->type->vector_elements) - 1; - emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); + emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); return true; } @@ -1370,7 +1355,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan b.negate = ~b.negate; this->result = get_temp(ir->type); - emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); + emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); return true; } @@ -1388,7 +1373,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); + emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); *reg = temp; } @@ -1464,7 +1449,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: if (result_dst.type != GLSL_TYPE_FLOAT) - emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); else { /* Previously 'SEQ dst, src, 0.0' was used for this. However, many * older GPUs implement SEQ using multiple instructions (i915 uses two @@ -1472,24 +1457,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * 0.0 and 1.0, 1-x also implements !x. */ op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); } break; case ir_unop_neg: if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) - emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); else if (result_dst.type == GLSL_TYPE_DOUBLE) - emit(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); else { op[0].negate = ~op[0].negate; result_src = op[0]; } break; case ir_unop_abs: - emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: - emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); break; case ir_unop_rcp: emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); @@ -1513,17 +1498,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_saturate: { glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); inst->saturate = true; break; } case ir_unop_dFdx: case ir_unop_dFdx_coarse: - emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdx_fine: - emit(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); break; case ir_unop_dFdy: case ir_unop_dFdy_coarse: @@ -1547,18 +1532,18 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); - emit(ir, ir->operation == ir_unop_dFdy_fine ? + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); + emit_asm(ir, ir->operation == ir_unop_dFdy_fine ? TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp); break; } case ir_unop_frexp_sig: - emit(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); break; case ir_unop_frexp_exp: - emit(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); break; case ir_unop_noise: { @@ -1568,50 +1553,50 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * place to do this is in the GL state tracker, not the poor * driver. */ - emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } case ir_binop_add: - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); break; case ir_binop_mul: - emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE) assert(!"not reached: should be handled by ir_div_to_mul_rcp"); else - emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); break; case ir_binop_mod: if (result_dst.type == GLSL_TYPE_FLOAT) assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); else - emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); break; case ir_binop_less: - emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; case ir_binop_greater: - emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); break; case ir_binop_lequal: - emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); + emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); break; case ir_binop_gequal: - emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); break; case ir_binop_equal: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); break; case ir_binop_nequal: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_all_equal: /* "==" operator producing a scalar boolean. */ @@ -1625,7 +1610,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_dst_reg temp_dst = st_dst_reg(temp); st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); - emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); /* Emit 1-3 AND operations to combine the SEQ results. */ switch (ir->operands[0]->type->vector_elements) { @@ -1635,24 +1620,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_YYYY; temp2.swizzle = SWIZZLE_ZZZZ; - emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); break; case 4: temp_dst.writemask = WRITEMASK_X; temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_ZZZZ; temp2.swizzle = SWIZZLE_WWWW; - emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); } temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); } else { - emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); /* After the dot-product, the value will be an integer on the * range [0,4]. Zero becomes 1.0, and positive values become zero. @@ -1665,10 +1650,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg sge_src = result_src; sge_src.negate = ~sge_src.negate; - emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); } } else { - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } break; case ir_binop_any_nequal: @@ -1678,7 +1663,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg temp = get_temp(native_integers ? glsl_type::uvec4_type : glsl_type::vec4_type); - emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); if (native_integers) { st_dst_reg temp_dst = st_dst_reg(temp); @@ -1692,22 +1677,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_YYYY; temp2.swizzle = SWIZZLE_ZZZZ; - emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); break; case 4: temp_dst.writemask = WRITEMASK_X; temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); temp_dst.writemask = WRITEMASK_Y; temp1.swizzle = SWIZZLE_ZZZZ; temp2.swizzle = SWIZZLE_WWWW; - emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); } temp1.swizzle = SWIZZLE_XXXX; temp2.swizzle = SWIZZLE_YYYY; - emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); } else { /* After the dot-product, the value will be an integer on the * range [0,4]. Zero stays zero, and positive values become 1.0. @@ -1726,11 +1711,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); } } } else { - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); } break; @@ -1763,7 +1748,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) GET_SWZ(op0_swizzle, 3), GET_SWZ(op0_swizzle, 3), GET_SWZ(op0_swizzle, 3)); - emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); accum = st_src_reg(result_dst); accum.swizzle = dst_swizzle; /* fallthrough */ @@ -1772,7 +1757,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) GET_SWZ(op0_swizzle, 2), GET_SWZ(op0_swizzle, 2), GET_SWZ(op0_swizzle, 2)); - emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); accum = st_src_reg(result_dst); accum.swizzle = dst_swizzle; /* fallthrough */ @@ -1781,7 +1766,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) GET_SWZ(op0_swizzle, 1), GET_SWZ(op0_swizzle, 1), GET_SWZ(op0_swizzle, 1)); - emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); break; default: assert(!"Unexpected vector size"); @@ -1807,11 +1792,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); } else { /* Use SNE 0 if integers are being used as boolean values. */ - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); } } break; @@ -1819,9 +1804,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_binop_logic_xor: if (native_integers) - emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); else - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_logic_or: { @@ -1830,13 +1815,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * instruction. */ assert(native_integers); - emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); } else { /* After the addition, the value will be an integer on the * range [0,2]. Zero stays zero, and positive values become 1.0. */ glsl_to_tgsi_instruction *add = - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { /* The clamping to [0,1] can be done for free in the fragment * shader with a saturate if floats are being used as boolean values. @@ -1849,7 +1834,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ st_src_reg slt_src = result_src; slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); } } break; @@ -1861,9 +1846,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * actual AND opcode. */ if (native_integers) - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); else - emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_dot: @@ -1879,10 +1864,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } else { /* sqrt(x) = x * rsq(x). */ emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); - emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); + emit_asm(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); /* For incoming channels <= 0, set the result to 0. */ op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_CMP, result_dst, + emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], result_src, st_src_reg_for_float(0.0)); } break; @@ -1891,13 +1876,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2f: if (native_integers) { - emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]); break; } /* fallthrough to next case otherwise */ case ir_unop_b2f: if (native_integers) { - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); break; } /* fallthrough to next case otherwise */ @@ -1912,7 +1897,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * GLSL requires that int(bool) return 1 for true and 0 for false. * This conversion is done with AND, but it could be done with NEG. */ - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); } else { /* Booleans and integers are both stored as floats when native * integers are disabled. @@ -1922,15 +1907,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_f2i: if (native_integers) - emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]); else - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2u: if (native_integers) - emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]); else - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_bitcast_f2i: result_src = op[0]; @@ -1946,38 +1931,38 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) result_src.type = GLSL_TYPE_FLOAT; break; case ir_unop_f2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_d2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0)); break; case ir_unop_i2b: if (native_integers) - emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0)); else - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); + emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_trunc: - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_ceil: - emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); break; case ir_unop_floor: - emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]); break; case ir_unop_round_even: - emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); break; case ir_unop_fract: - emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]); break; case ir_binop_min: - emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); break; case ir_binop_max: - emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); break; case ir_binop_pow: emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); @@ -1985,37 +1970,37 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_bit_not: if (native_integers) { - emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); break; } case ir_unop_u2f: if (native_integers) { - emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); break; } case ir_binop_lshift: if (native_integers) { - emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); break; } case ir_binop_rshift: if (native_integers) { - emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); break; } case ir_binop_bit_and: if (native_integers) { - emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); break; } case ir_binop_bit_xor: if (native_integers) { - emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); break; } case ir_binop_bit_or: if (native_integers) { - emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); break; } @@ -2045,7 +2030,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } else { /* Relative/variable index into constant buffer */ - emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], + emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4)); cbuf.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); @@ -2078,88 +2063,88 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) const_offset % 16 / 4); if (ir->type->base_type == GLSL_TYPE_BOOL) { - emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); + emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); } else { - emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); } break; } case ir_triop_lrp: /* note: we have to reorder the three args here */ - emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); + emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); break; case ir_triop_csel: if (this->ctx->Const.NativeIntegers) - emit(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); else { op[0].negate = ~op[0].negate; - emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); } break; case ir_triop_bitfield_extract: - emit(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); break; case ir_quadop_bitfield_insert: - emit(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); + emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); break; case ir_unop_bitfield_reverse: - emit(ir, TGSI_OPCODE_BREV, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]); break; case ir_unop_bit_count: - emit(ir, TGSI_OPCODE_POPC, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]); break; case ir_unop_find_msb: - emit(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); break; case ir_unop_find_lsb: - emit(ir, TGSI_OPCODE_LSB, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]); break; case ir_binop_imul_high: - emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); break; case ir_triop_fma: /* In theory, MAD is incorrect here. */ if (have_fma) - emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); else - emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); break; case ir_unop_interpolate_at_centroid: - emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); break; case ir_binop_interpolate_at_offset: - emit(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]); break; case ir_binop_interpolate_at_sample: - emit(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); break; case ir_unop_d2f: - emit(ir, TGSI_OPCODE_D2F, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]); break; case ir_unop_f2d: - emit(ir, TGSI_OPCODE_F2D, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]); break; case ir_unop_d2i: - emit(ir, TGSI_OPCODE_D2I, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]); break; case ir_unop_i2d: - emit(ir, TGSI_OPCODE_I2D, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]); break; case ir_unop_d2u: - emit(ir, TGSI_OPCODE_D2U, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]); break; case ir_unop_u2d: - emit(ir, TGSI_OPCODE_U2D, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]); break; case ir_unop_unpack_double_2x32: case ir_unop_pack_double_2x32: - emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); break; case ir_binop_ldexp: if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) { - emit(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); + emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); } else { assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); } @@ -2243,11 +2228,38 @@ glsl_to_tgsi_visitor::visit(ir_swizzle *ir) this->result = src; } +/* Test if the variable is an array. Note that geometry and + * tessellation shader inputs are outputs are always arrays (except + * for patch inputs), so only the array element type is considered. + */ +static bool +is_inout_array(unsigned stage, ir_variable *var, bool *is_2d) +{ + const glsl_type *type = var->type; + + if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || + (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out)) + return false; + + *is_2d = false; + + if (stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) { + if (!var->type->is_array()) + return false; /* a system value probably */ + + type = var->type->fields.array; + *is_2d = true; + } + + return type->is_array() || type->is_matrix(); +} + void glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) { variable_storage *entry = find_variable_storage(ir->var); ir_variable *var = ir->var; + bool is_2d; if (!entry) { switch (var->data.mode) { @@ -2263,16 +2275,56 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) * user-defined varyings. */ assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location); + + if (is_inout_array(shader->Stage, var, &is_2d)) { + struct array_decl *decl = &input_arrays[num_input_arrays]; + + decl->mesa_index = var->data.location; + decl->array_id = num_input_arrays + 1; + if (is_2d) + decl->array_size = type_size(var->type->fields.array); + else + decl->array_size = type_size(var->type); + num_input_arrays++; + + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->data.location, + decl->array_id); + } + else { + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->data.location); + } + this->variables.push_tail(entry); break; case ir_var_shader_out: assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - var->data.location - + var->data.index); + + if (is_inout_array(shader->Stage, var, &is_2d)) { + struct array_decl *decl = &output_arrays[num_output_arrays]; + + decl->mesa_index = var->data.location; + decl->array_id = num_output_arrays + 1; + if (is_2d) + decl->array_size = type_size(var->type->fields.array); + else + decl->array_size = type_size(var->type); + num_output_arrays++; + + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->data.location, + decl->array_id); + } + else { + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->data.location + + var->data.index); + } + this->variables.push_tail(entry); break; case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, @@ -2296,10 +2348,43 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); + this->result.array_id = entry->array_id; if (!native_integers) this->result.type = GLSL_TYPE_FLOAT; } +static void +shrink_array_declarations(struct array_decl *arrays, unsigned count, + GLbitfield64 usage_mask) +{ + unsigned i, j; + + /* Fix array declarations by removing unused array elements at both ends + * of the arrays. For example, mat4[3] where only mat[1] is used. + */ + for (i = 0; i < count; i++) { + struct array_decl *decl = &arrays[i]; + + /* Shrink the beginning. */ + for (j = 0; j < decl->array_size; j++) { + if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + break; + + decl->mesa_index++; + decl->array_size--; + j--; + } + + /* Shrink the end. */ + for (j = decl->array_size-1; j >= 0; j--) { + if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + break; + + decl->array_size--; + } + } +} + void glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) { @@ -2341,7 +2426,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) index_reg = get_temp(native_integers ? glsl_type::int_type : glsl_type::float_type); - emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), this->result, st_src_reg_for_type(index_reg.type, element_size)); } @@ -2352,7 +2437,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) st_src_reg accum_reg = get_temp(native_integers ? glsl_type::int_type : glsl_type::float_type); - emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), index_reg, *src.reladdr); index_reg = accum_reg; @@ -2589,16 +2674,16 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * l_src.swizzle = swizzle_for_size(type->vector_elements); if (native_integers) { - emit(ir, TGSI_OPCODE_UCMP, *l, *cond, + emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond, cond_swap ? l_src : *r, cond_swap ? *r : l_src); } else { - emit(ir, TGSI_OPCODE_CMP, *l, *cond, + emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond, cond_swap ? l_src : *r, cond_swap ? *r : l_src); } } else { - emit(ir, TGSI_OPCODE_MOV, *l, *r); + emit_asm(ir, TGSI_OPCODE_MOV, *l, *r); } l->index++; r->index++; @@ -2679,7 +2764,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) */ glsl_to_tgsi_instruction *inst, *new_inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); new_inst->saturate = inst->saturate; inst->dead_mask = inst->dst[0].writemask; } else { @@ -2717,7 +2802,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = this->result; for (i = 0; i < (unsigned int)size; i++) { - emit(ir, TGSI_OPCODE_MOV, temp, src); + emit_asm(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -2739,7 +2824,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) ir->array_elements[i]->accept(this); src = this->result; for (int j = 0; j < size; j++) { - emit(ir, TGSI_OPCODE_MOV, temp, src); + emit_asm(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -2764,7 +2849,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) ir->type->vector_elements, GL_FLOAT, &src.swizzle); - emit(ir, TGSI_OPCODE_MOV, mat_column, src); + emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; } @@ -2889,7 +2974,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) l.cond_mask = COND_TR; for (i = 0; i < type_size(param->type); i++) { - emit(ir, TGSI_OPCODE_MOV, l, r); + emit_asm(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2897,7 +2982,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) } /* Emit call instruction */ - call_inst = emit(ir, TGSI_OPCODE_CAL); + call_inst = emit_asm(ir, TGSI_OPCODE_CAL); call_inst->function = entry; /* Process out parameters. */ @@ -2922,7 +3007,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) st_dst_reg l = st_dst_reg(this->result); for (i = 0; i < type_size(param->type); i++) { - emit(ir, TGSI_OPCODE_MOV, l, r); + emit_asm(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2965,7 +3050,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1; - emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); } if (ir->projector) { @@ -3074,7 +3159,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector); coord_dst.writemask = WRITEMASK_XYZW; opcode = TGSI_OPCODE_TXP; } else { @@ -3086,7 +3171,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * projective divide now. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); + emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector); /* In the case where we have to project the coordinates "by hand," * the shadow comparator value must also be projected. @@ -3105,14 +3190,14 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) assert(!sampler_type->sampler_array); tmp_dst.writemask = WRITEMASK_Z; - emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); tmp_dst.writemask = WRITEMASK_XY; - emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); + emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord); } coord_dst.writemask = WRITEMASK_XYZ; - emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); + emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); coord_dst.writemask = WRITEMASK_XYZW; coord.swizzle = SWIZZLE_XYZW; @@ -3133,7 +3218,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) cube_sc = get_temp(glsl_type::float_type); cube_sc_dst = st_dst_reg(cube_sc); cube_sc_dst.writemask = WRITEMASK_X; - emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); cube_sc_dst.writemask = WRITEMASK_X; } else { @@ -3144,20 +3229,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } else { coord_dst.writemask = WRITEMASK_Z; } - emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); coord_dst.writemask = WRITEMASK_XYZW; } } if (ir->op == ir_txf_ms) { coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); coord_dst.writemask = WRITEMASK_XYZW; } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || opcode == TGSI_OPCODE_TXF) { /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); coord_dst.writemask = WRITEMASK_XYZW; } @@ -3167,30 +3252,30 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } if (opcode == TGSI_OPCODE_TXD) - inst = emit(ir, opcode, result_dst, coord, dx, dy); + inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); else if (opcode == TGSI_OPCODE_TXQ) { if (ir->op == ir_query_levels) { /* the level is stored in W */ - inst = emit(ir, opcode, st_dst_reg(levels_src), lod_info); + inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info); result_dst.writemask = WRITEMASK_X; levels_src.swizzle = SWIZZLE_WWWW; - emit(ir, TGSI_OPCODE_MOV, result_dst, levels_src); + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src); } else - inst = emit(ir, opcode, result_dst, lod_info); + inst = emit_asm(ir, opcode, result_dst, lod_info); } else if (opcode == TGSI_OPCODE_TXF) { - inst = emit(ir, opcode, result_dst, coord); + inst = emit_asm(ir, opcode, result_dst, coord); } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { - inst = emit(ir, opcode, result_dst, coord, lod_info); + inst = emit_asm(ir, opcode, result_dst, coord, lod_info); } else if (opcode == TGSI_OPCODE_TEX2) { - inst = emit(ir, opcode, result_dst, coord, cube_sc); + inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else if (opcode == TGSI_OPCODE_TG4) { if (is_cube_array && ir->shadow_comparitor) { - inst = emit(ir, opcode, result_dst, coord, cube_sc); + inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else { - inst = emit(ir, opcode, result_dst, coord, component); + inst = emit_asm(ir, opcode, result_dst, coord, component); } } else - inst = emit(ir, opcode, result_dst, coord); + inst = emit_asm(ir, opcode, result_dst, coord); if (ir->shadow_comparitor) inst->tex_shadow = GL_TRUE; @@ -3246,6 +3331,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) assert(!"Should not get here."); } + inst->tex_type = ir->type->base_type; + this->result = result_src; } @@ -3264,13 +3351,13 @@ glsl_to_tgsi_visitor::visit(ir_return *ir) l = st_dst_reg(current_function->return_reg); for (i = 0; i < type_size(current_function->sig->return_type); i++) { - emit(ir, TGSI_OPCODE_MOV, l, r); + emit_asm(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } } - emit(ir, TGSI_OPCODE_RET); + emit_asm(ir, TGSI_OPCODE_RET); } void @@ -3283,16 +3370,16 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) /* Convert the bool condition to a float so we can negate. */ if (native_integers) { st_src_reg temp = get_temp(ir->condition->type); - emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp), + emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp), condition, st_src_reg_for_float(1.0)); condition = temp; } condition.negate = ~condition.negate; - emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); + emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); } else { /* unconditional kil */ - emit(ir, TGSI_OPCODE_KILL); + emit_asm(ir, TGSI_OPCODE_KILL); } } @@ -3307,18 +3394,18 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF; - if_inst = emit(ir->condition, if_opcode, undef_dst, this->result); + if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result); this->instructions.push_tail(if_inst); visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - emit(ir->condition, TGSI_OPCODE_ELSE); + emit_asm(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } - if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); + if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF); } @@ -3328,7 +3415,7 @@ glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir) assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); ir->stream->accept(this); - emit(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); + emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); } void @@ -3337,14 +3424,24 @@ glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); ir->stream->accept(this); - emit(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); + emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); +} + +void +glsl_to_tgsi_visitor::visit(ir_barrier *ir) +{ + unreachable("Not implemented!"); } glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { result.file = PROGRAM_UNDEFINED; next_temp = 1; + array_sizes = NULL; + max_num_arrays = 0; next_array = 0; + num_input_arrays = 0; + num_output_arrays = 0; next_signature_id = 1; num_immediates = 0; current_function = NULL; @@ -3366,6 +3463,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { + free(array_sizes); ralloc_free(mem_ctx); } @@ -3387,7 +3485,13 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (is_tex_instruction(inst->op)) { for (int i = 0; i < inst->sampler_array_size; i++) { - v->samplers_used |= 1 << (inst->sampler.index + i); + unsigned idx = inst->sampler.index + i; + v->samplers_used |= 1 << idx; + + debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); + v->sampler_types[idx] = inst->tex_type; + v->sampler_targets[idx] = + st_translate_texture_target(inst->tex_target, inst->tex_shadow); if (inst->tex_shadow) { prog->ShadowSamplers |= 1 << (inst->sampler.index + i); @@ -3734,6 +3838,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) inst->src[r].index2D = first->src[0].index2D; inst->src[r].has_index2 = first->src[0].has_index2; inst->src[r].double_reg2 = first->src[0].double_reg2; + inst->src[r].array_id = first->src[0].array_id; int swizzle = 0; for (int i = 0; i < 4; i++) { @@ -4177,7 +4282,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); - inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4201,7 +4306,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* MAD colorTemp, colorTemp, scale, bias; */ scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); - inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); } if (pixel_maps) { @@ -4209,6 +4314,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, st_dst_reg temp_dst = st_dst_reg(temp); assert(st->pixel_xfer.pixelmap_texture); + (void) st; /* With a little effort, we can do four pixel map look-ups with * two TEX instructions: @@ -4216,7 +4322,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ temp_dst.writemask = WRITEMASK_XY; /* write R,G */ - inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0); inst->sampler.index = 1; inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4224,7 +4330,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ - inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0); inst->sampler.index = 1; inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4233,7 +4339,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used |= (1 << 1); /* MOV colorTemp, temp; */ - inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp); } /* Now copy the instructions from the original glsl_to_tgsi_visitor into the @@ -4256,7 +4362,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); newinst->tex_target = inst->tex_target; newinst->sampler_array_size = inst->sampler_array_size; } @@ -4306,7 +4412,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); src0 = v->get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); - inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler.index = samplerIndex; inst->sampler_array_size = 1; inst->tex_target = TEXTURE_2D_INDEX; @@ -4319,7 +4425,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, src0.negate = NEGATE_XYZW; if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) src0.swizzle = SWIZZLE_XXXX; - inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0); + inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0); /* Now copy the instructions from the original glsl_to_tgsi_visitor into the * new visitor. */ @@ -4336,7 +4442,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); } - newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); + newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]); newinst->tex_target = inst->tex_target; newinst->sampler_array_size = inst->sampler_array_size; } @@ -4362,7 +4468,8 @@ struct st_translate { unsigned temps_size; struct ureg_dst *temps; - struct ureg_dst arrays[MAX_ARRAYS]; + struct ureg_dst *arrays; + unsigned num_temp_arrays; struct ureg_src *constants; int num_constants; struct ureg_src *immediates; @@ -4373,7 +4480,9 @@ struct st_translate { struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; - unsigned array_sizes[MAX_ARRAYS]; + unsigned *array_sizes; + struct array_decl *input_arrays; + struct array_decl *output_arrays; const GLuint *inputMapping; const GLuint *outputMapping; @@ -4497,9 +4606,8 @@ emit_immediate(struct st_translate *t, * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. */ static struct ureg_dst -dst_register(struct st_translate *t, - gl_register_file file, - GLuint index) +dst_register(struct st_translate *t, gl_register_file file, unsigned index, + unsigned array_id) { unsigned array; @@ -4530,7 +4638,7 @@ dst_register(struct st_translate *t, case PROGRAM_ARRAY: array = index >> 16; - assert(array < ARRAY_SIZE(t->arrays)); + assert(array < t->num_temp_arrays); if (ureg_dst_is_undef(t->arrays[array])) t->arrays[array] = ureg_DECL_array_temporary( @@ -4540,16 +4648,25 @@ dst_register(struct st_translate *t, (int)(index & 0xFFFF) - 0x8000); case PROGRAM_OUTPUT: - if (t->procType == TGSI_PROCESSOR_VERTEX) - assert(index < VARYING_SLOT_MAX); - else if (t->procType == TGSI_PROCESSOR_FRAGMENT) - assert(index < FRAG_RESULT_MAX); - else - assert(index < VARYING_SLOT_MAX); + if (!array_id) { + if (t->procType == TGSI_PROCESSOR_FRAGMENT) + assert(index < FRAG_RESULT_MAX); + else + assert(index < VARYING_SLOT_MAX); - assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); + assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); + assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL); + return t->outputs[t->outputMapping[index]]; + } + else { + struct array_decl *decl = &t->output_arrays[array_id-1]; + unsigned mesa_index = decl->mesa_index; + int slot = t->outputMapping[mesa_index]; - return t->outputs[t->outputMapping[index]]; + assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT); + assert(t->outputs[slot].ArrayID == array_id); + return ureg_dst_array_offset(t->outputs[slot], index - mesa_index); + } case PROGRAM_ADDRESS: return t->address[index]; @@ -4575,7 +4692,8 @@ src_register(struct st_translate *t, const st_src_reg *reg) case PROGRAM_TEMPORARY: case PROGRAM_ARRAY: - return ureg_src(dst_register(t, reg->file, reg->index)); + case PROGRAM_OUTPUT: + return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id)); case PROGRAM_UNIFORM: assert(reg->index >= 0); @@ -4598,12 +4716,20 @@ src_register(struct st_translate *t, const st_src_reg *reg) * map back to the original index and add the offset after * mapping. */ index -= double_reg2; - assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); - return t->inputs[t->inputMapping[index] + double_reg2]; + if (!reg->array_id) { + assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); + assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL); + return t->inputs[t->inputMapping[index]]; + } + else { + struct array_decl *decl = &t->input_arrays[reg->array_id-1]; + unsigned mesa_index = decl->mesa_index; + int slot = t->inputMapping[mesa_index]; - case PROGRAM_OUTPUT: - assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs)); - return ureg_src(t->outputs[t->outputMapping[reg->index]]); /* not needed? */ + assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT); + assert(t->inputs[slot].ArrayID == reg->array_id); + return ureg_src_array_offset(t->inputs[slot], index - mesa_index); + } case PROGRAM_ADDRESS: return ureg_src(t->address[reg->index]); @@ -4626,9 +4752,8 @@ translate_dst(struct st_translate *t, const st_dst_reg *dst_reg, bool saturate, bool clamp_color) { - struct ureg_dst dst = dst_register(t, - dst_reg->file, - dst_reg->index); + struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index, + dst_reg->array_id); if (dst.File == TGSI_FILE_NULL) return dst; @@ -4738,7 +4863,7 @@ translate_tex_offset(struct st_translate *t, array = in_offset->index >> 16; assert(array >= 0); - assert(array < (int) ARRAY_SIZE(t->arrays)); + assert(array < (int)t->num_temp_arrays); dst = t->arrays[array]; offset.File = dst.File; @@ -5060,6 +5185,25 @@ emit_edgeflags(struct st_translate *t) ureg_MOV(ureg, edge_dst, edge_src); } +static bool +find_array(unsigned attr, struct array_decl *arrays, unsigned count, + unsigned *array_id, unsigned *array_size) +{ + unsigned i; + + for (i = 0; i < count; i++) { + struct array_decl *decl = &arrays[i]; + + if (attr == decl->mesa_index) { + *array_id = decl->array_id; + *array_size = decl->array_size; + assert(*array_size); + return true; + } + } + return false; +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -5089,12 +5233,14 @@ st_translate_program( const struct gl_program *proginfo, GLuint numInputs, const GLuint inputMapping[], + const GLuint inputSlotToAttr[], const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], const GLuint interpLocation[], GLuint numOutputs, const GLuint outputMapping[], + const GLuint outputSlotToAttr[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], boolean passthrough_edgeflags, @@ -5132,25 +5278,101 @@ st_translate_program( goto out; } - memset(t, 0, sizeof *t); - t->procType = procType; t->inputMapping = inputMapping; t->outputMapping = outputMapping; t->ureg = ureg; + t->num_temp_arrays = program->next_array; + if (t->num_temp_arrays) + t->arrays = (struct ureg_dst*) + calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays); /* * Declare input attributes. */ - if (procType == TGSI_PROCESSOR_FRAGMENT) { + switch (procType) { + case TGSI_PROCESSOR_FRAGMENT: for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, - inputSemanticName[i], - inputSemanticIndex[i], - interpMode[i], 0, - interpLocation[i]); + unsigned array_id = 0; + unsigned array_size; + + if (find_array(inputSlotToAttr[i], program->input_arrays, + program->num_input_arrays, &array_id, &array_size)) { + /* We've found an array. Declare it so. */ + t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, + inputSemanticName[i], inputSemanticIndex[i], + interpMode[i], 0, interpLocation[i], + array_id, array_size); + i += array_size - 1; + } + else { + t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, + inputSemanticName[i], inputSemanticIndex[i], + interpMode[i], 0, interpLocation[i], 0, 1); + } + } + break; + case TGSI_PROCESSOR_GEOMETRY: + for (i = 0; i < numInputs; i++) { + unsigned array_id = 0; + unsigned array_size; + + if (find_array(inputSlotToAttr[i], program->input_arrays, + program->num_input_arrays, &array_id, &array_size)) { + /* We've found an array. Declare it so. */ + t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], + inputSemanticIndex[i], + array_id, array_size); + i += array_size - 1; + } + else { + t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], + inputSemanticIndex[i], 0, 1); + } } + break; + case TGSI_PROCESSOR_VERTEX: + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); + } + break; + default: + assert(0); + } + /* + * Declare output attributes. + */ + switch (procType) { + case TGSI_PROCESSOR_FRAGMENT: + break; + case TGSI_PROCESSOR_GEOMETRY: + case TGSI_PROCESSOR_VERTEX: + for (i = 0; i < numOutputs; i++) { + unsigned array_id = 0; + unsigned array_size; + + if (find_array(outputSlotToAttr[i], program->output_arrays, + program->num_output_arrays, &array_id, &array_size)) { + /* We've found an array. Declare it so. */ + t->outputs[i] = ureg_DECL_output_array(ureg, + outputSemanticName[i], + outputSemanticIndex[i], + array_id, array_size); + i += array_size - 1; + } + else { + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); + } + } + break; + default: + assert(0); + } + + if (procType == TGSI_PROCESSOR_FRAGMENT) { if (proginfo->InputsRead & VARYING_BIT_POS) { /* Must do this after setting up t->inputs. */ emit_wpos(st_context(ctx), t, proginfo, ureg, @@ -5160,9 +5382,6 @@ st_translate_program( if (proginfo->InputsRead & VARYING_BIT_FACE) emit_face_var(ctx, t); - /* - * Declare output attributes. - */ for (i = 0; i < numOutputs; i++) { switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: @@ -5198,31 +5417,8 @@ st_translate_program( } } } - else if (procType == TGSI_PROCESSOR_GEOMETRY) { - for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_gs_input(ureg, - i, - inputSemanticName[i], - inputSemanticIndex[i]); - } - + else if (procType == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output(ureg, - outputSemanticName[i], - outputSemanticIndex[i]); - } - } - else { - assert(procType == TGSI_PROCESSOR_VERTEX); - - for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_vs_input(ureg, i); - } - - for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output(ureg, - outputSemanticName[i], - outputSemanticIndex[i]); if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ ureg_MOV(ureg, @@ -5277,9 +5473,9 @@ st_translate_program( } } - /* Copy over array sizes - */ - memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array); + t->array_sizes = program->array_sizes; + t->input_arrays = program->input_arrays; + t->output_arrays = program->output_arrays; /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. @@ -5355,7 +5551,26 @@ st_translate_program( /* texture samplers */ for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { + unsigned type; + t->samplers[i] = ureg_DECL_sampler(ureg, i); + + switch (program->sampler_types[i]) { + case GLSL_TYPE_INT: + type = TGSI_RETURN_TYPE_SINT; + break; + case GLSL_TYPE_UINT: + type = TGSI_RETURN_TYPE_UINT; + break; + case GLSL_TYPE_FLOAT: + type = TGSI_RETURN_TYPE_FLOAT; + break; + default: + unreachable("not reached"); + } + + ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i], + type, type, type, type ); } } @@ -5375,6 +5590,7 @@ st_translate_program( out: if (t) { + free(t->arrays); free(t->temps); free(t->insn); free(t->labels); @@ -5470,7 +5686,7 @@ get_mesa_program(struct gl_context *ctx, if (!entry->bgn_inst) { v->current_function = entry; - entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); + entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB); entry->bgn_inst->function = entry; visit_exec_list(&entry->sig->body, v); @@ -5478,10 +5694,10 @@ get_mesa_program(struct gl_context *ctx, glsl_to_tgsi_instruction *last; last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); if (last->op != TGSI_OPCODE_RET) - v->emit(NULL, TGSI_OPCODE_RET); + v->emit_asm(NULL, TGSI_OPCODE_RET); glsl_to_tgsi_instruction *end; - end = v->emit(NULL, TGSI_OPCODE_ENDSUB); + end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB); end->function = entry; progress = GL_TRUE; @@ -5513,7 +5729,7 @@ get_mesa_program(struct gl_context *ctx, v->renumber_registers(); /* Write the END instruction. */ - v->emit(NULL, TGSI_OPCODE_END); + v->emit_asm(NULL, TGSI_OPCODE_END); if (ctx->_Shader->Flags & GLSL_DUMP) { _mesa_log("\n"); @@ -5528,6 +5744,10 @@ get_mesa_program(struct gl_context *ctx, prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog, shader->Stage); + shrink_array_declarations(v->input_arrays, v->num_input_arrays, + prog->InputsRead); + shrink_array_declarations(v->output_arrays, v->num_output_arrays, + prog->OutputsWritten); count_resources(v, prog); /* This must be done before the uniform storage is associated. */ @@ -5549,6 +5769,7 @@ get_mesa_program(struct gl_context *ctx, */ _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); if (!shader_program->LinkStatus) { + free_glsl_to_tgsi_visitor(v); return NULL; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 2cb80bcf961..4af747fa9de 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -43,12 +43,14 @@ enum pipe_error st_translate_program( const struct gl_program *proginfo, GLuint numInputs, const GLuint inputMapping[], + const GLuint inputSlotToAttr[], const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], const GLuint interpLocation[], GLuint numOutputs, const GLuint outputMapping[], + const GLuint outputSlotToAttr[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], boolean passthrough_edgeflags, diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 840f76a1307..a2dee6298fa 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -680,6 +680,10 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, if (attribs->flags & ST_CONTEXT_FLAG_FORWARD_COMPATIBLE) st->ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; + if (attribs->flags & ST_CONTEXT_FLAG_ROBUST_ACCESS) + st->ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; + if (attribs->flags & ST_CONTEXT_FLAG_RESET_NOTIFICATION_ENABLED) + st->ctx->Const.ResetStrategy = GL_LOSE_CONTEXT_ON_RESET_ARB; /* need to perform version check */ if (attribs->major > 1 || attribs->minor > 0) { @@ -920,8 +924,7 @@ static unsigned get_version(struct pipe_screen *screen, struct gl_extensions extensions = {0}; GLuint version; - if ((api == API_OPENGL_COMPAT || api == API_OPENGL_CORE) && - _mesa_override_gl_version_contextless(&consts, &api, &version)) { + if (_mesa_override_gl_version_contextless(&consts, &api, &version)) { return version; } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 98d525c86c2..896e239ee68 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -665,7 +665,7 @@ compile_instruction( if (num_dst) dst[0] = translate_dst( t, &inst->DstReg, - inst->SaturateMode, + inst->Saturate, clamp_dst_color_output); for (i = 0; i < num_src; i++) @@ -1095,10 +1095,9 @@ st_translate_mesa_program( } else if (procType == TGSI_PROCESSOR_GEOMETRY) { for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_gs_input(ureg, - i, - inputSemanticName[i], - inputSemanticIndex[i]); + t->inputs[i] = ureg_DECL_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], 0, 1); } for (i = 0; i < numOutputs; i++) { diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index a9110d3c674..fa792bc349b 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -215,6 +215,7 @@ st_prepare_vertex_program(struct gl_context *ctx, unsigned slot = stvp->num_outputs++; stvp->result_to_output[attr] = slot; + stvp->output_slot_to_attr[slot] = attr; switch (attr) { case VARYING_SLOT_POS: @@ -285,7 +286,8 @@ st_prepare_vertex_program(struct gl_context *ctx, /* fall through */ case VARYING_SLOT_VAR0: default: - assert(attr < VARYING_SLOT_MAX); + assert(attr >= VARYING_SLOT_VAR0 || + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; stvp->output_semantic_index[slot] = st_get_generic_varying_index(st, attr); @@ -321,7 +323,7 @@ st_translate_vertex_program(struct st_context *st, _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); } - ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen); if (ureg == NULL) { free(vpv); return NULL; @@ -351,6 +353,7 @@ st_translate_vertex_program(struct st_context *st, /* inputs */ vpv->num_inputs, stvp->input_to_index, + NULL, /* inputSlotToAttr */ NULL, /* input semantic name */ NULL, /* input semantic index */ NULL, /* interp mode */ @@ -358,6 +361,7 @@ st_translate_vertex_program(struct st_context *st, /* outputs */ num_outputs, stvp->result_to_output, + stvp->output_slot_to_attr, stvp->output_semantic_name, stvp->output_semantic_index, key->passthrough_edgeflags, @@ -482,6 +486,7 @@ st_translate_fragment_program(struct st_context *st, GLuint outputMapping[FRAG_RESULT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; + GLuint inputSlotToAttr[VARYING_SLOT_MAX]; GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ GLuint interpLocation[PIPE_MAX_SHADER_INPUTS]; GLuint attr; @@ -502,6 +507,7 @@ st_translate_fragment_program(struct st_context *st, return NULL; assert(!(key->bitmap && key->drawpixels)); + memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr)); if (key->bitmap) { /* glBitmap drawing */ @@ -543,6 +549,7 @@ st_translate_fragment_program(struct st_context *st, const GLuint slot = fs_num_inputs++; inputMapping[attr] = slot; + inputSlotToAttr[slot] = attr; if (stfp->Base.IsCentroid & BITFIELD64_BIT(attr)) interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTROID; else if (stfp->Base.IsSample & BITFIELD64_BIT(attr)) @@ -657,7 +664,8 @@ st_translate_fragment_program(struct st_context *st, * consumed for the TEXi varyings, and we can base the locations of * the user varyings on VAR0. Otherwise, we use TEX0 as base index. */ - assert(attr >= VARYING_SLOT_TEX0); + assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC || + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; input_semantic_index[slot] = st_get_generic_varying_index(st, attr); if (attr == VARYING_SLOT_PNTC) @@ -732,7 +740,7 @@ st_translate_fragment_program(struct st_context *st, } } - ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); + ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen); if (ureg == NULL) { free(variant); return NULL; @@ -778,6 +786,7 @@ st_translate_fragment_program(struct st_context *st, /* inputs */ fs_num_inputs, inputMapping, + inputSlotToAttr, input_semantic_name, input_semantic_index, interpMode, @@ -785,6 +794,7 @@ st_translate_fragment_program(struct st_context *st, /* outputs */ fs_num_outputs, outputMapping, + NULL, fs_output_semantic_name, fs_output_semantic_index, FALSE, key->clamp_color ); @@ -867,7 +877,9 @@ st_translate_geometry_program(struct st_context *st, struct st_geometry_program *stgp, const struct st_gp_variant_key *key) { + GLuint inputSlotToAttr[VARYING_SLOT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; + GLuint outputSlotToAttr[VARYING_SLOT_MAX]; GLuint outputMapping[VARYING_SLOT_MAX]; struct pipe_context *pipe = st->pipe; GLuint attr; @@ -890,13 +902,15 @@ st_translate_geometry_program(struct st_context *st, if (!gpv) return NULL; - ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY); + ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen); if (ureg == NULL) { free(gpv); return NULL; } + memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr)); memset(inputMapping, 0, sizeof(inputMapping)); + memset(outputSlotToAttr, 0, sizeof(outputSlotToAttr)); memset(outputMapping, 0, sizeof(outputMapping)); /* @@ -907,6 +921,7 @@ st_translate_geometry_program(struct st_context *st, const GLuint slot = gs_num_inputs++; inputMapping[attr] = slot; + inputSlotToAttr[slot] = attr; switch (attr) { case VARYING_SLOT_PRIMITIVE_ID: @@ -985,6 +1000,7 @@ st_translate_geometry_program(struct st_context *st, GLuint slot = gs_num_outputs++; outputMapping[attr] = slot; + outputSlotToAttr[slot] = attr; switch (attr) { case VARYING_SLOT_POS: @@ -1080,6 +1096,7 @@ st_translate_geometry_program(struct st_context *st, /* inputs */ gs_num_inputs, inputMapping, + inputSlotToAttr, input_semantic_name, input_semantic_index, NULL, @@ -1087,6 +1104,7 @@ st_translate_geometry_program(struct st_context *st, /* outputs */ gs_num_outputs, outputMapping, + outputSlotToAttr, gs_output_semantic_name, gs_output_semantic_index, FALSE, @@ -1201,7 +1219,7 @@ destroy_program_variants(struct st_context *st, struct gl_program *program) } } break; - case MESA_GEOMETRY_PROGRAM: + case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *stgp = (struct st_geometry_program *) program; diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index a2c56062d6e..bb77eb6ed65 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -163,6 +163,7 @@ struct st_vertex_program /** Maps VARYING_SLOT_x to slot */ GLuint result_to_output[VARYING_SLOT_MAX]; + GLuint output_slot_to_attr[VARYING_SLOT_MAX]; ubyte output_semantic_name[VARYING_SLOT_MAX]; ubyte output_semantic_index[VARYING_SLOT_MAX]; GLuint num_outputs; diff --git a/src/mesa/swrast/s_texrender.c b/src/mesa/swrast/s_texrender.c index fa853c9197f..4e41b3b72a8 100644 --- a/src/mesa/swrast/s_texrender.c +++ b/src/mesa/swrast/s_texrender.c @@ -72,7 +72,7 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att) * \param fb the framebuffer object the texture is being bound to * \param att the fb attachment point of the texture * - * \sa _mesa_framebuffer_renderbuffer + * \sa _mesa_FramebufferRenderbuffer_sw */ void _swrast_render_texture(struct gl_context *ctx, diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 5b9dd54d75a..bc77ba8bf95 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -36,6 +36,7 @@ #include "math/m_xform.h" #include "main/state.h" #include "main/viewport.h" +#include "util/simple_list.h" #include "tnl.h" #include "t_context.h" diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 3ea775c0e4a..72b8206ec23 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -1817,9 +1817,12 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx, SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance); } - if (ctx->API == API_OPENGL_CORE) { + if (ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) { SET_DrawArraysIndirect(exec, vbo_exec_DrawArraysIndirect); SET_DrawElementsIndirect(exec, vbo_exec_DrawElementsIndirect); + } + + if (ctx->API == API_OPENGL_CORE) { SET_MultiDrawArraysIndirect(exec, vbo_exec_MultiDrawArraysIndirect); SET_MultiDrawElementsIndirect(exec, vbo_exec_MultiDrawElementsIndirect); } |