summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/SConscript3
-rw-r--r--src/mesa/drivers/common/driverfuncs.c2
-rw-r--r--src/mesa/drivers/common/meta.c3
-rw-r--r--src/mesa/drivers/common/meta_blit.c6
-rw-r--r--src/mesa/drivers/common/meta_tex_subimage.c12
-rw-r--r--src/mesa/drivers/dri/Makefile.am1
-rw-r--r--src/mesa/drivers/dri/common/Android.mk8
-rw-r--r--src/mesa/drivers/dri/common/Makefile.am6
-rw-r--r--src/mesa/drivers/dri/common/Makefile.sources4
-rw-r--r--src/mesa/drivers/dri/common/SConscript2
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c22
-rw-r--r--src/mesa/drivers/dri/i915/i830_vtbl.c4
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c2
-rw-r--r--src/mesa/drivers/dri/i915/i915_vtbl.c4
-rw-r--r--src/mesa/drivers/dri/i915/intel_fbo.c2
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.am1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources5
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_clear.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_compute.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_conditional_render.c161
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c75
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h64
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.cpp25
-rw-r--r--src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/brw_dead_control_flow.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h39
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_compact.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c53
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp977
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h241
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h652
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp19
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp46
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_fp.cpp742
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp60
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp441
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp66
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp43
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp2709
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_surface_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_inst.h14
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h61
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h44
-rw-r--r--src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp26
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c21
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c69
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h22
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp18
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp125
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h25
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h14
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c317
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_surface_formats.c442
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c366
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.h38
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp78
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h26
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp43
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp40
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp49
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vp.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c102
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vue_map.c148
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.h7
-rw-r--r--src/mesa/drivers/dri/i965/gen6_multisample_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen6_queryobj.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_scissor_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c29
-rw-r--r--src/mesa/drivers/dri/i965/gen6_viewport_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sf_state.c28
-rw-r--r--src/mesa/drivers/dri/i965/gen7_viewport_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c48
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen8_depth_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/gen8_gs_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen8_sf_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c102
-rw-r--r--src/mesa/drivers/dri/i965/gen8_viewport_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen8_vs_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.c55
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.c58
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.h2
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.c11
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.h4
-rw-r--r--src/mesa/drivers/dri/i965/intel_extensions.c6
-rw-r--r--src/mesa/drivers/dri/i965/intel_fbo.c5
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c307
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.h37
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_draw.c15
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_read.c24
-rw-r--r--src/mesa/drivers/dri/i965/intel_reg.h23
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.c10
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.h8
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex.c8
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex.h2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_image.c23
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_validate.c9
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp123
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp90
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp22
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp22
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_fbo.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nv10_state_tnl.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_tnl.c2
-rw-r--r--src/mesa/drivers/dri/r200/r200_state.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_state.c4
-rw-r--r--src/mesa/drivers/dri/swrast/swrast.c5
-rw-r--r--src/mesa/drivers/haiku/swrast/SConscript33
-rw-r--r--src/mesa/drivers/haiku/swrast/SoftwareRast.cpp697
-rw-r--r--src/mesa/drivers/haiku/swrast/SoftwareRast.h95
-rw-r--r--src/mesa/drivers/haiku/swrast/SoftwareRast.rdef39
-rw-r--r--src/mesa/drivers/osmesa/Makefile.am1
-rw-r--r--src/mesa/drivers/x11/Makefile.am9
143 files changed, 4392 insertions, 6557 deletions
diff --git a/src/mesa/drivers/SConscript b/src/mesa/drivers/SConscript
index db656780c0b..5d654f538be 100644
--- a/src/mesa/drivers/SConscript
+++ b/src/mesa/drivers/SConscript
@@ -8,6 +8,3 @@ if env['dri']:
'dri/common/xmlpool/SConscript',
'dri/common/SConscript',
])
-
-if env['platform'] == 'haiku':
- SConscript('haiku/swrast/SConscript')
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 0d094ddf4e6..71c1a763912 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -172,7 +172,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->UnmapRenderbuffer = _swrast_unmap_soft_renderbuffer;
driver->RenderTexture = _swrast_render_texture;
driver->FinishRenderTexture = _swrast_finish_render_texture;
- driver->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer;
+ driver->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw;
driver->ValidateFramebuffer = _mesa_validate_framebuffer;
driver->BlitFramebuffer = _swrast_BlitFramebuffer;
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index d2ab7b8ded9..214a68a9129 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -1211,7 +1211,8 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_BindRenderbuffer(GL_RENDERBUFFER, save->RenderbufferName);
if (state & MESA_META_DRAW_BUFFERS) {
- _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, save->ColorDrawBuffers, NULL);
+ _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers,
+ save->ColorDrawBuffers, NULL);
}
ctx->Meta->SaveStackDepth--;
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index bb2164276b2..9cace2b245a 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -82,7 +82,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
y_scale = samples * 0.5;
/* We expect only power of 2 samples in source multisample buffer. */
- assert(samples > 0 && (samples & (samples - 1)) == 0);
+ assert(samples > 0 && is_power_of_two(samples));
while (samples >> (shader_offset + 1)) {
shader_offset++;
}
@@ -263,7 +263,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
}
/* We expect only power of 2 samples in source multisample buffer. */
- assert(samples > 0 && (samples & (samples - 1)) == 0);
+ assert(samples > 0 && is_power_of_two(samples));
while (samples >> (shader_offset + 1)) {
shader_offset++;
}
@@ -434,7 +434,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
* (so the floating point exponent just gets increased), rather than
* doing a naive sum and dividing.
*/
- assert((samples & (samples - 1)) == 0);
+ assert(is_power_of_two(samples));
/* Fetch each individual sample. */
sample_resolve = rzalloc_size(mem_ctx, 1);
for (i = 0; i < samples; i++) {
diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c
index ad6e7873ecd..d2474f52718 100644
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -34,6 +34,7 @@
#include "macros.h"
#include "meta.h"
#include "pbo.h"
+#include "readpix.h"
#include "shaderapi.h"
#include "state.h"
#include "teximage.h"
@@ -150,7 +151,8 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
bool success = false;
int z;
- if (!_mesa_is_bufferobj(packing->BufferObj) && !create_pbo)
+ if (!_mesa_is_bufferobj(packing->BufferObj) &&
+ (!create_pbo || pixels == NULL))
return false;
if (format == GL_DEPTH_COMPONENT ||
@@ -257,6 +259,7 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
int full_height, image_height;
struct gl_texture_image *pbo_tex_image;
+ struct gl_renderbuffer *rb = NULL;
GLenum status;
bool success = false;
int z;
@@ -273,6 +276,13 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
if (ctx->_ImageTransferState)
return false;
+
+ if (!tex_image) {
+ rb = ctx->ReadBuffer->_ColorReadBuffer;
+ if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format))
+ return false;
+ }
+
/* For arrays, use a tall (height * depth) 2D texture but taking into
* account the inter-image padding specified with the image height packing
* property.
diff --git a/src/mesa/drivers/dri/Makefile.am b/src/mesa/drivers/dri/Makefile.am
index fa1de103b56..08a8e645521 100644
--- a/src/mesa/drivers/dri/Makefile.am
+++ b/src/mesa/drivers/dri/Makefile.am
@@ -60,6 +60,7 @@ mesa_dri_drivers_la_LIBADD = \
../../libmesa.la \
common/libmegadriver_stub.la \
common/libdricommon.la \
+ common/libxmlconfig.la \
$(MEGADRIVERS_DEPS) \
$(DRI_LIB_DEPS) \
$()
diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk
index a7fcd6d572a..6986f5e8cb4 100644
--- a/src/mesa/drivers/dri/common/Android.mk
+++ b/src/mesa/drivers/dri/common/Android.mk
@@ -39,7 +39,9 @@ intermediates := $(call local-generated-sources-dir)
LOCAL_C_INCLUDES := \
$(MESA_DRI_C_INCLUDES)
-LOCAL_EXPORT_C_INCLUDE_DIRS := $(intermediates)
+LOCAL_EXPORT_C_INCLUDE_DIRS := \
+ $(LOCAL_PATH) \
+ $(intermediates)
# swrast only
ifeq ($(MESA_GPU_DRIVERS),swrast)
@@ -48,7 +50,9 @@ else
LOCAL_SHARED_LIBRARIES := libdrm
endif
-LOCAL_SRC_FILES := $(DRI_COMMON_FILES)
+LOCAL_SRC_FILES := \
+ $(DRI_COMMON_FILES) \
+ $(XMLCONFIG_FILES)
MESA_DRI_OPTIONS_H := $(intermediates)/xmlpool/options.h
LOCAL_GENERATED_SOURCES := $(MESA_DRI_OPTIONS_H)
diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am
index da8f97a980e..ae19fcb3565 100644
--- a/src/mesa/drivers/dri/common/Makefile.am
+++ b/src/mesa/drivers/dri/common/Makefile.am
@@ -33,16 +33,20 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
$(DEFINES) \
- $(EXPAT_CFLAGS) \
$(VISIBILITY_CFLAGS)
noinst_LTLIBRARIES = \
libdricommon.la \
+ libxmlconfig.la \
libmegadriver_stub.la \
libdri_test_stubs.la
libdricommon_la_SOURCES = $(DRI_COMMON_FILES)
+libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES)
+libxmlconfig_la_CFLAGS = $(AM_CFLAGS) $(EXPAT_CFLAGS)
+libxmlconfig_la_LIBADD = $(EXPAT_LIBS) -lm
+
libdri_test_stubs_la_SOURCES = $(test_stubs_FILES)
libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN
diff --git a/src/mesa/drivers/dri/common/Makefile.sources b/src/mesa/drivers/dri/common/Makefile.sources
index d00ec5f7334..d5d8da8fcee 100644
--- a/src/mesa/drivers/dri/common/Makefile.sources
+++ b/src/mesa/drivers/dri/common/Makefile.sources
@@ -2,7 +2,9 @@ DRI_COMMON_FILES := \
utils.c \
utils.h \
dri_util.c \
- dri_util.h \
+ dri_util.h
+
+XMLCONFIG_FILES := \
xmlconfig.c \
xmlconfig.h
diff --git a/src/mesa/drivers/dri/common/SConscript b/src/mesa/drivers/dri/common/SConscript
index 0bee1b41fc6..b402736db69 100644
--- a/src/mesa/drivers/dri/common/SConscript
+++ b/src/mesa/drivers/dri/common/SConscript
@@ -37,7 +37,7 @@ drienv.PkgUseModules('DRM')
# else
#env.Append(CPPDEFINES = ['__NOT_HAVE_DRM_H'])
-sources = drienv.ParseSourceList('Makefile.sources', 'DRI_COMMON_FILES')
+sources = drienv.ParseSourceList('Makefile.sources', ['DRI_COMMON_FILES', 'XMLCONFIG_FILES' ])
dri_common = drienv.ConvenienceLibrary(
target = 'dri_common',
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index d6e875fcfeb..e7ababe0b67 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -162,13 +162,21 @@ driCreateNewScreen2(int scrn, int fd,
return NULL;
}
- int gl_version_override = _mesa_get_gl_version_override();
- if (gl_version_override >= 31) {
- psp->max_gl_core_version = MAX2(psp->max_gl_core_version,
- gl_version_override);
- } else {
- psp->max_gl_compat_version = MAX2(psp->max_gl_compat_version,
- gl_version_override);
+ struct gl_constants consts = { 0 };
+ gl_api api;
+ unsigned version;
+
+ api = API_OPENGLES2;
+ if (_mesa_override_gl_version_contextless(&consts, &api, &version))
+ psp->max_gl_es2_version = version;
+
+ api = API_OPENGL_COMPAT;
+ if (_mesa_override_gl_version_contextless(&consts, &api, &version)) {
+ if (api == API_OPENGL_CORE) {
+ psp->max_gl_core_version = version;
+ } else {
+ psp->max_gl_compat_version = version;
+ }
}
psp->api_mask = (1 << __DRI_API_OPENGL);
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 91da977acee..8ed8ff555ba 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -730,9 +730,9 @@ i830_update_draw_buffer(struct intel_context *intel)
*/
if (ctx->NewState & _NEW_BUFFERS) {
/* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
- _mesa_update_framebuffer(ctx);
+ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer);
/* this updates the DrawBuffer's Width/Height if it's a FBO */
- _mesa_update_draw_buffer_bounds(ctx);
+ _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
}
if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 9b002236add..03c32e56d82 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -220,7 +220,7 @@ get_result_flags(const struct prog_instruction *inst)
{
GLuint flags = 0;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ if (inst->Saturate)
flags |= A0_DEST_SATURATE;
if (inst->DstReg.WriteMask & WRITEMASK_X)
flags |= A0_DEST_CHANNEL_X;
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index 97bf81ed759..80bd249fa7b 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -732,9 +732,9 @@ i915_update_draw_buffer(struct intel_context *intel)
*/
if (ctx->NewState & _NEW_BUFFERS) {
/* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
- _mesa_update_framebuffer(ctx);
+ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer);
/* this updates the DrawBuffer's Width/Height if it's a FBO */
- _mesa_update_draw_buffer_bounds(ctx);
+ _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
}
if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c
index 24c318049c4..a5d5c5832fb 100644
--- a/src/mesa/drivers/dri/i915/intel_fbo.c
+++ b/src/mesa/drivers/dri/i915/intel_fbo.c
@@ -427,7 +427,7 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx,
{
DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0);
- _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+ _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb);
intel_draw_buffer(ctx);
}
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index cf2424e34b4..9c947be88a0 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -48,6 +48,7 @@ libi965_dri_la_LIBADD = $(INTEL_LIBS)
TEST_LIBS = \
libi965_dri.la \
../common/libdricommon.la \
+ ../common/libxmlconfig.la \
../common/libmegadriver_stub.la \
../../../libmesa.la \
$(DRI_LIB_DEPS) \
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 1ae93e1d5f3..981fe79b132 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -18,9 +18,11 @@ i965_FILES = \
brw_clip_unfilled.c \
brw_clip_util.c \
brw_compute.c \
+ brw_conditional_render.c \
brw_context.c \
brw_context.h \
brw_cs.cpp \
+ brw_cs.h \
brw_cubemap_normalize.cpp \
brw_curbe.c \
brw_dead_control_flow.cpp \
@@ -40,6 +42,7 @@ i965_FILES = \
brw_ff_gs.c \
brw_ff_gs_emit.c \
brw_ff_gs.h \
+ brw_fs_builder.h \
brw_fs_channel_expressions.cpp \
brw_fs_cmod_propagation.cpp \
brw_fs_combine_constants.cpp \
@@ -47,7 +50,6 @@ i965_FILES = \
brw_fs.cpp \
brw_fs_cse.cpp \
brw_fs_dead_code_eliminate.cpp \
- brw_fs_fp.cpp \
brw_fs_generator.cpp \
brw_fs.h \
brw_fs_live_variables.cpp \
@@ -128,6 +130,7 @@ i965_FILES = \
brw_vs.h \
brw_vs_state.c \
brw_vs_surface_state.c \
+ brw_vue_map.c \
brw_wm.c \
brw_wm.h \
brw_wm_iz.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index c1b760920d9..789520c7353 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -29,7 +29,8 @@
brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw,
bool debug_flag)
: mem_ctx(ralloc_context(NULL)),
- generator(brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key),
+ generator(brw->intelScreen->compiler, brw,
+ mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key),
(struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data),
NULL, 0, false, "BLORP")
{
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 7e7770e43cd..f1f230e3751 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -141,12 +141,12 @@ bblock_t::combine_with(bblock_t *that)
}
void
-bblock_t::dump(backend_visitor *v) const
+bblock_t::dump(backend_shader *s) const
{
int ip = this->start_ip;
foreach_inst_in_block(backend_instruction, inst, this) {
fprintf(stderr, "%5d: ", ip);
- v->dump_instruction(inst);
+ s->dump_instruction(inst);
ip++;
}
}
@@ -231,6 +231,7 @@ cfg_t::cfg_t(exec_list *instructions)
if (cur_else) {
cur_else->add_successor(mem_ctx, cur_endif);
} else {
+ assert(cur_if != NULL);
cur_if->add_successor(mem_ctx, cur_endif);
}
@@ -299,6 +300,7 @@ cfg_t::cfg_t(exec_list *instructions)
inst->exec_node::remove();
cur->instructions.push_tail(inst);
+ assert(cur_do != NULL && cur_while != NULL);
cur->add_successor(mem_ctx, cur_do);
set_next_block(&cur, cur_while, ip);
@@ -411,7 +413,7 @@ cfg_t::make_block_array()
}
void
-cfg_t::dump(backend_visitor *v)
+cfg_t::dump(backend_shader *s)
{
if (idom_dirty)
calculate_idom();
@@ -423,8 +425,8 @@ cfg_t::dump(backend_visitor *v)
link->block->num);
}
fprintf(stderr, "\n");
- if (v != NULL)
- block->dump(v);
+ if (s != NULL)
+ block->dump(s);
fprintf(stderr, "END B%d", block->num);
foreach_list_typed(bblock_link, link, link, &block->children) {
fprintf(stderr, " ->B%d",
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
index 56d7d07abdf..a09491781e6 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -60,7 +60,7 @@ struct bblock_t {
bool is_successor_of(const bblock_t *block) const;
bool can_combine_with(const bblock_t *that) const;
void combine_with(bblock_t *that);
- void dump(backend_visitor *v) const;
+ void dump(backend_shader *s) const;
backend_instruction *start();
const backend_instruction *start() const;
@@ -273,7 +273,7 @@ struct cfg_t {
void calculate_idom();
static bblock_t *intersect(bblock_t *b1, bblock_t *b2);
- void dump(backend_visitor *v);
+ void dump(backend_shader *s);
void dump_cfg();
void dump_domtree();
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 12314204803..1d4ba3cac7e 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -121,8 +121,9 @@ brw_fast_clear_depth(struct gl_context *ctx)
* first.
*/
if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(ctx, fb)) {
- perf_debug("Failed to fast clear depth due to scissor being enabled. "
- "Possible 5%% performance win if avoided.\n");
+ perf_debug("Failed to fast clear %dx%d depth because of scissors. "
+ "Possible 5%% performance win if avoided.\n",
+ mt->logical_width0, mt->logical_height0);
return false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 32238341aae..dee74dba8af 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -32,6 +32,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "main/framebuffer.h"
static void
upload_clip_vp(struct brw_context *brw)
@@ -59,7 +60,9 @@ brw_upload_clip_unit(struct brw_context *brw)
struct brw_clip_unit_state *clip;
/* _NEW_BUFFERS */
- struct gl_framebuffer *fb = ctx->DrawBuffer;
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
+ const float fb_width = (float)_mesa_geometric_width(fb);
+ const float fb_height = (float)_mesa_geometric_height(fb);
upload_clip_vp(brw);
@@ -127,8 +130,8 @@ brw_upload_clip_unit(struct brw_context *brw)
/* enable guardband clipping if we can */
if (ctx->ViewportArray[0].X == 0 &&
ctx->ViewportArray[0].Y == 0 &&
- ctx->ViewportArray[0].Width == (float) fb->Width &&
- ctx->ViewportArray[0].Height == (float) fb->Height)
+ ctx->ViewportArray[0].Width == fb_width &&
+ ctx->ViewportArray[0].Height == fb_height)
{
clip->clip5.guard_band_enable = 1;
clip->clip6.clipper_viewport_state_ptr =
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index b3d6de51adc..5693ab507d4 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -45,7 +45,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
unsigned thread_width_max =
(group_size + simd_size - 1) / simd_size;
- uint32_t right_mask = (1u << simd_size) - 1;
+ uint32_t right_mask = 0xffffffffu >> (32 - simd_size);
const unsigned right_non_aligned = group_size & (simd_size - 1);
if (right_non_aligned != 0)
right_mask >>= (simd_size - right_non_aligned);
diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c
new file mode 100644
index 00000000000..6d37c3b6928
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Neil Roberts <[email protected]>
+ */
+
+/** @file brw_conditional_render.c
+ *
+ * Support for conditional rendering based on query objects
+ * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+.
+ */
+
+#include "main/imports.h"
+#include "main/condrender.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+set_predicate_enable(struct brw_context *brw,
+ bool value)
+{
+ if (value)
+ brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+ else
+ brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER;
+}
+
+static void
+set_predicate_for_result(struct brw_context *brw,
+ struct brw_query_object *query,
+ bool inverted)
+{
+ int load_op;
+
+ assert(query->bo != NULL);
+
+ brw_load_register_mem64(brw,
+ MI_PREDICATE_SRC0,
+ query->bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0, /* write domain */
+ 0 /* offset */);
+ brw_load_register_mem64(brw,
+ MI_PREDICATE_SRC1,
+ query->bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0, /* write domain */
+ 8 /* offset */);
+
+ if (inverted)
+ load_op = MI_PREDICATE_LOADOP_LOAD;
+ else
+ load_op = MI_PREDICATE_LOADOP_LOADINV;
+
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+ load_op |
+ MI_PREDICATE_COMBINEOP_SET |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+ ADVANCE_BATCH();
+
+ brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+}
+
+static void
+brw_begin_conditional_render(struct gl_context *ctx,
+ struct gl_query_object *q,
+ GLenum mode)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_query_object *query = (struct brw_query_object *) q;
+ bool inverted;
+
+ if (!brw->predicate.supported)
+ return;
+
+ switch (mode) {
+ case GL_QUERY_WAIT:
+ case GL_QUERY_NO_WAIT:
+ case GL_QUERY_BY_REGION_WAIT:
+ case GL_QUERY_BY_REGION_NO_WAIT:
+ inverted = false;
+ break;
+ case GL_QUERY_WAIT_INVERTED:
+ case GL_QUERY_NO_WAIT_INVERTED:
+ case GL_QUERY_BY_REGION_WAIT_INVERTED:
+ case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
+ inverted = true;
+ break;
+ default:
+ unreachable("Unexpected conditional render mode");
+ }
+
+ /* If there are already samples from a BLT operation or if the query object
+ * is ready then we can avoid looking at the values in the buffer and just
+ * decide whether to draw using the CPU without stalling.
+ */
+ if (query->Base.Result || query->Base.Ready)
+ set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted);
+ else
+ set_predicate_for_result(brw, query, inverted);
+}
+
+static void
+brw_end_conditional_render(struct gl_context *ctx,
+ struct gl_query_object *q)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ /* When there is no longer a conditional render in progress it should
+ * always render.
+ */
+ brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+}
+
+void
+brw_init_conditional_render_functions(struct dd_function_table *functions)
+{
+ functions->BeginConditionalRender = brw_begin_conditional_render;
+ functions->EndConditionalRender = brw_end_conditional_render;
+}
+
+bool
+brw_check_conditional_render(struct brw_context *brw)
+{
+ if (brw->predicate.supported) {
+ /* In some cases it is possible to determine that the primitives should
+ * be skipped without needing the predicate enable bit and still without
+ * stalling.
+ */
+ return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
+ } else if (brw->ctx.Query.CondRenderQuery) {
+ perf_debug("Conditional rendering is implemented in software and may "
+ "stall.\n");
+ return _mesa_check_conditional_render(&brw->ctx);
+ } else {
+ return true;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 23838056690..ebf12fab69e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -50,6 +50,7 @@
#include "brw_context.h"
#include "brw_defines.h"
+#include "brw_shader.h"
#include "brw_draw.h"
#include "brw_state.h"
@@ -68,8 +69,6 @@
#include "tnl/t_pipeline.h"
#include "util/ralloc.h"
-#include "glsl/nir/nir.h"
-
/***************************************
* Mesa's Driver Functions
***************************************/
@@ -289,6 +288,8 @@ brw_init_driver_functions(struct brw_context *brw,
else
gen4_init_queryobj_functions(functions);
brw_init_compute_functions(functions);
+ if (brw->gen >= 7)
+ brw_init_conditional_render_functions(functions);
functions->QuerySamplesForFormat = brw_query_samples_for_format;
@@ -427,11 +428,7 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.MinLineWidth = 1.0;
ctx->Const.MinLineWidthAA = 1.0;
- if (brw->gen >= 9 || brw->is_cherryview) {
- ctx->Const.MaxLineWidth = 40.0;
- ctx->Const.MaxLineWidthAA = 40.0;
- ctx->Const.LineWidthGranularity = 0.125;
- } else if (brw->gen >= 6) {
+ if (brw->gen >= 6) {
ctx->Const.MaxLineWidth = 7.375;
ctx->Const.MaxLineWidthAA = 7.375;
ctx->Const.LineWidthGranularity = 0.125;
@@ -441,6 +438,13 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.LineWidthGranularity = 0.5;
}
+ /* For non-antialiased lines, we have to round the line width to the
+ * nearest whole number. Make sure that we don't advertise a line
+ * width that, when rounded, will be beyond the actual hardware
+ * maximum.
+ */
+ assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
+
ctx->Const.MinPointSize = 1.0;
ctx->Const.MinPointSizeAA = 1.0;
ctx->Const.MaxPointSize = 255.0;
@@ -544,6 +548,7 @@ brw_initialize_context_constants(struct brw_context *brw)
*/
ctx->Const.UniformBufferOffsetAlignment = 16;
ctx->Const.TextureBufferOffsetAlignment = 16;
+ ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
if (brw->gen >= 6) {
ctx->Const.MaxVarying = 32;
@@ -553,51 +558,12 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
}
- static const nir_shader_compiler_options nir_options = {
- .native_integers = true,
- /* In order to help allow for better CSE at the NIR level we tell NIR
- * to split all ffma instructions during opt_algebraic and we then
- * re-combine them as a later step.
- */
- .lower_ffma = true,
- .lower_sub = true,
- };
-
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
- ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
- ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
- ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
- ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
- ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
- (i == MESA_SHADER_FRAGMENT);
- ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
- (i == MESA_SHADER_FRAGMENT);
- ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
- ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
+ ctx->Const.ShaderCompilerOptions[i] =
+ brw->intelScreen->compiler->glsl_compiler_options[i];
}
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
- if (brw->scalar_vs) {
- /* If we're using the scalar backend for vertex shaders, we need to
- * configure these accordingly.
- */
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
-
- if (brw_env_var_as_boolean("INTEL_USE_NIR", true))
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options;
- }
-
- if (brw_env_var_as_boolean("INTEL_USE_NIR", true))
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options;
-
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_COMPUTE].NirOptions = &nir_options;
-
/* ARB_viewport_array */
if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
@@ -612,6 +578,12 @@ brw_initialize_context_constants(struct brw_context *brw)
/* ARB_gpu_shader5 */
if (brw->gen >= 7)
ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
+
+ /* ARB_framebuffer_no_attachments */
+ ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
+ ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
+ ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
+ ctx->Const.MaxFramebufferSamples = max_samples;
}
static void
@@ -814,10 +786,9 @@ brwCreateContext(gl_api api,
_mesa_meta_init(ctx);
brw_process_driconf_options(brw);
- brw_process_intel_debug_variable(brw);
- if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
- brw->scalar_vs = true;
+ if (INTEL_DEBUG & DEBUG_PERF)
+ brw->perf_debug = true;
brw_initialize_context_constants(brw);
@@ -894,6 +865,8 @@ brwCreateContext(gl_api api,
brw->gs.enabled = false;
brw->sf.viewport_transform_enable = true;
+ brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+
ctx->VertexProgram._MaintainTnlProgram = true;
ctx->FragmentProgram._MaintainTexEnvProgram = true;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index cb4cc7fb36b..9e1f722df9e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -611,6 +611,12 @@ struct brw_ff_gs_prog_data {
unsigned svbi_postincrement_value;
};
+enum shader_dispatch_mode {
+ DISPATCH_MODE_4X1_SINGLE = 0,
+ DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
+ DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
+ DISPATCH_MODE_SIMD8 = 3,
+};
/* Note: brw_vue_prog_data_compare() must be updated when adding fields to
* this struct!
@@ -628,7 +634,7 @@ struct brw_vue_prog_data {
*/
GLuint urb_entry_size;
- bool simd8;
+ enum shader_dispatch_mode dispatch_mode;
};
@@ -726,14 +732,6 @@ struct brw_gs_prog_data
int invocations;
/**
- * Dispatch mode, can be any of:
- * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
- * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
- * GEN7_GS_DISPATCH_MODE_SINGLE
- */
- int dispatch_mode;
-
- /**
* Gen6 transform feedback enabled flag.
*/
bool gen6_xfb_enabled;
@@ -829,20 +827,10 @@ struct brw_tracked_state {
enum shader_time_shader_type {
ST_NONE,
ST_VS,
- ST_VS_WRITTEN,
- ST_VS_RESET,
ST_GS,
- ST_GS_WRITTEN,
- ST_GS_RESET,
ST_FS8,
- ST_FS8_WRITTEN,
- ST_FS8_RESET,
ST_FS16,
- ST_FS16_WRITTEN,
- ST_FS16_RESET,
ST_CS,
- ST_CS_WRITTEN,
- ST_CS_RESET,
};
struct brw_vertex_buffer {
@@ -972,6 +960,22 @@ struct brw_stage_state
uint32_t sampler_offset;
};
+enum brw_predicate_state {
+ /* The first two states are used if we can determine whether to draw
+ * without having to look at the values in the query object buffer. This
+ * will happen if there is no conditional render in progress, if the query
+ * object is already completed or if something else has already added
+ * samples to the preliminary result such as via a BLT command.
+ */
+ BRW_PREDICATE_STATE_RENDER,
+ BRW_PREDICATE_STATE_DONT_RENDER,
+ /* In this case whether to draw or not depends on the result of an
+ * MI_PREDICATE command so the predicate enable bit needs to be checked.
+ */
+ BRW_PREDICATE_STATE_USE_BIT
+};
+
+struct shader_times;
/**
* brw_context is derived from gl_context.
@@ -1131,7 +1135,6 @@ struct brw_context
bool has_pln;
bool no_simd8;
bool use_rep_send;
- bool scalar_vs;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
@@ -1408,6 +1411,11 @@ struct brw_context
} query;
struct {
+ enum brw_predicate_state state;
+ bool supported;
+ } predicate;
+
+ struct {
/** A map from pipeline statistics counter IDs to MMIO addresses. */
const int *statistics_registers;
@@ -1453,6 +1461,7 @@ struct brw_context
uint32_t offset;
uint32_t size;
enum aub_state_struct_type type;
+ int index;
} *state_batch_list;
int state_batch_count;
@@ -1492,7 +1501,7 @@ struct brw_context
const char **names;
int *ids;
enum shader_time_shader_type *types;
- uint64_t *cumulative;
+ struct shader_times *cumulative;
int num_entries;
int max_entries;
double report_time;
@@ -1606,12 +1615,21 @@ void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx);
void brw_store_register_mem64(struct brw_context *brw,
drm_intel_bo *bo, uint32_t reg, int idx);
+/** brw_conditional_render.c */
+void brw_init_conditional_render_functions(struct dd_function_table *functions);
+bool brw_check_conditional_render(struct brw_context *brw);
+
/** intel_batchbuffer.c */
void brw_load_register_mem(struct brw_context *brw,
uint32_t reg,
drm_intel_bo *bo,
uint32_t read_domains, uint32_t write_domain,
uint32_t offset);
+void brw_load_register_mem64(struct brw_context *brw,
+ uint32_t reg,
+ drm_intel_bo *bo,
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t offset);
/*======================================================================
* brw_state_dump.c
@@ -1991,6 +2009,10 @@ void intel_context_destroy(struct brw_context *brw);
void
brw_initialize_context_constants(struct brw_context *brw);
+bool
+gen9_use_linear_1d_layout(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 2432875d0f4..42a082b57b6 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -88,9 +88,15 @@ brw_cs_emit(struct brw_context *brw,
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
+
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(brw, mem_ctx, key, prog_data, prog, cp, 8);
+ fs_visitor v8(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ &cp->Base, 8, st_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
} else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
@@ -98,7 +104,9 @@ brw_cs_emit(struct brw_context *brw,
prog_data->simd_size = 8;
}
- fs_visitor v16(brw, mem_ctx, key, prog_data, prog, cp, 16);
+ fs_visitor v16(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ &cp->Base, 16, st_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
local_workgroup_size <= 16 * brw->max_cs_threads) {
@@ -126,7 +134,8 @@ brw_cs_emit(struct brw_context *brw,
return NULL;
}
- fs_generator g(brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+ fs_generator g(brw->intelScreen->compiler, brw,
+ mem_ctx, (void*) key, &prog_data->base, &cp->Base,
v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
if (INTEL_DEBUG & DEBUG_CS) {
char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
@@ -368,9 +377,11 @@ brw_upload_cs_state(struct brw_context *brw)
extern "C"
const struct brw_tracked_state brw_cs_state = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_CS_PROG_DATA,
+ /* explicit initialisers aren't valid C++, comment
+ * them for documentation purposes */
+ /* .dirty = */{
+ /* .mesa = */ 0,
+ /* .brw = */ BRW_NEW_CS_PROG_DATA,
},
- .emit = brw_upload_cs_state
+ /* .emit = */ brw_upload_cs_state
};
diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
index 03f838dd9ae..61f25811cb2 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
@@ -36,11 +36,11 @@
* - if/else/endif
*/
bool
-dead_control_flow_eliminate(backend_visitor *v)
+dead_control_flow_eliminate(backend_shader *s)
{
bool progress = false;
- foreach_block_safe (block, v->cfg) {
+ foreach_block_safe (block, s->cfg) {
bblock_t *if_block = NULL, *else_block = NULL, *endif_block = block;
bool found = false;
@@ -115,7 +115,7 @@ dead_control_flow_eliminate(backend_visitor *v)
}
if (progress)
- v->invalidate_live_intervals();
+ s->invalidate_live_intervals();
return progress;
}
diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h
index 57a4dabc83c..83fd9b1e79e 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.h
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.h
@@ -23,4 +23,4 @@
#include "brw_shader.h"
-bool dead_control_flow_eliminate(backend_visitor *v);
+bool dead_control_flow_eliminate(backend_shader *s);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 3c704ee9d08..c113d52a3d3 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -38,6 +38,7 @@
fieldval & field ## _MASK; \
})
+#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low))
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
#ifndef BRW_DEFINES_H
@@ -51,6 +52,7 @@
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15)
# define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10)
+# define GEN7_3DPRIM_PREDICATE_ENABLE (1 << 8)
/* DW1 */
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
@@ -530,9 +532,11 @@
#define GEN7_SURFACE_ARYSPC_FULL (0 << 10)
#define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10)
-/* Surface state DW0 */
+/* Surface state DW1 */
#define GEN8_SURFACE_MOCS_SHIFT 24
#define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24)
+#define GEN8_SURFACE_QPITCH_SHIFT 0
+#define GEN8_SURFACE_QPITCH_MASK INTEL_MASK(14, 0)
/* Surface state DW2 */
#define BRW_SURFACE_HEIGHT_SHIFT 19
@@ -590,6 +594,15 @@
#define GEN7_SURFACE_MOCS_SHIFT 16
#define GEN7_SURFACE_MOCS_MASK INTEL_MASK(19, 16)
+#define GEN9_SURFACE_TRMODE_SHIFT 18
+#define GEN9_SURFACE_TRMODE_MASK INTEL_MASK(19, 18)
+#define GEN9_SURFACE_TRMODE_NONE 0
+#define GEN9_SURFACE_TRMODE_TILEYF 1
+#define GEN9_SURFACE_TRMODE_TILEYS 2
+
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_SHIFT 8
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_MASK INTEL_MASK(11, 8)
+
/* Surface state DW6 */
#define GEN7_SURFACE_MCS_ENABLE (1 << 0)
#define GEN7_SURFACE_MCS_PITCH_SHIFT 3
@@ -606,6 +619,8 @@
#define GEN8_SURFACE_AUX_MODE_HIZ 3
/* Surface state DW7 */
+#define GEN9_SURFACE_RT_COMPRESSION_SHIFT 30
+#define GEN9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30)
#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28
#define GEN7_SURFACE_SCS_R_SHIFT 25
#define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
@@ -1131,6 +1146,11 @@ enum opcode {
* Terminate the compute shader.
*/
CS_OPCODE_CS_TERMINATE,
+
+ /**
+ * GLSL barrier()
+ */
+ SHADER_OPCODE_BARRIER,
};
enum brw_urb_write_flags {
@@ -1592,6 +1612,14 @@ enum brw_message_target {
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
+#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY 0
+#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY 1
+#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG 2
+#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP 3
+#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG 4
+#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5
+#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE 6
+
#define CMD_URB_FENCE 0x6000
#define CMD_CS_URB_STATE 0x6001
@@ -1769,9 +1797,8 @@ enum brw_message_target {
# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20
# define GEN7_GS_INSTANCE_CONTROL_SHIFT 15
-# define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11)
+# define GEN7_GS_DISPATCH_MODE_SHIFT 11
+# define GEN7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11)
# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
# define GEN6_GS_RENDERING_ENABLE (1 << 8)
@@ -2470,8 +2497,8 @@ enum brw_wm_barycentric_interp_mode {
* cache settings. We still use only either write-back or write-through; and
* rely on the documented default values.
*/
-#define SKL_MOCS_WB 9
-#define SKL_MOCS_WT 5
+#define SKL_MOCS_WB (0b001001 << 1)
+#define SKL_MOCS_WT (0b000101 << 1)
#define MEDIA_VFE_STATE 0x7000
/* GEN7 DW2, GEN8+ DW3 */
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 95e262a361b..1075c5acba5 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -402,6 +402,16 @@ static const char *const gen6_sfid[16] = {
[HSW_SFID_CRE] = "cre",
};
+static const char *const gen7_gateway_subfuncid[8] = {
+ [BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY] = "open",
+ [BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY] = "close",
+ [BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG] = "forward msg",
+ [BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP] = "get timestamp",
+ [BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG] = "barrier msg",
+ [BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE] = "update state",
+ [BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write",
+};
+
static const char *const dp_write_port_msg_type[8] = {
[0b000] = "OWord block write",
[0b001] = "OWord dual block write",
@@ -977,13 +987,14 @@ src0_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_3src_src0_reg_nr(devinfo, inst));
if (err == -1)
return 0;
- if (src0_subreg_nr)
+ if (src0_subreg_nr || brw_inst_3src_src0_rep_ctrl(devinfo, inst))
format(file, ".%d", src0_subreg_nr);
if (brw_inst_3src_src0_rep_ctrl(devinfo, inst))
string(file, "<0,1,0>");
- else
+ else {
string(file, "<4,4,1>");
- err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst));
+ err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst));
+ }
err |= control(file, "src da16 reg type", three_source_reg_encoding,
brw_inst_3src_src_type(devinfo, inst), NULL);
return err;
@@ -1003,13 +1014,14 @@ src1_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_3src_src1_reg_nr(devinfo, inst));
if (err == -1)
return 0;
- if (src1_subreg_nr)
+ if (src1_subreg_nr || brw_inst_3src_src1_rep_ctrl(devinfo, inst))
format(file, ".%d", src1_subreg_nr);
if (brw_inst_3src_src1_rep_ctrl(devinfo, inst))
string(file, "<0,1,0>");
- else
+ else {
string(file, "<4,4,1>");
- err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst));
+ err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst));
+ }
err |= control(file, "src da16 reg type", three_source_reg_encoding,
brw_inst_3src_src_type(devinfo, inst), NULL);
return err;
@@ -1030,13 +1042,14 @@ src2_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_3src_src2_reg_nr(devinfo, inst));
if (err == -1)
return 0;
- if (src2_subreg_nr)
+ if (src2_subreg_nr || brw_inst_3src_src2_rep_ctrl(devinfo, inst))
format(file, ".%d", src2_subreg_nr);
if (brw_inst_3src_src2_rep_ctrl(devinfo, inst))
string(file, "<0,1,0>");
- else
+ else {
string(file, "<4,4,1>");
- err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst));
+ err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst));
+ }
err |= control(file, "src da16 reg type", three_source_reg_encoding,
brw_inst_3src_src_type(devinfo, inst), NULL);
return err;
@@ -1495,6 +1508,12 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
break;
case BRW_SFID_THREAD_SPAWNER:
break;
+
+ case BRW_SFID_MESSAGE_GATEWAY:
+ format(file, " (%s)",
+ gen7_gateway_subfuncid[brw_inst_gateway_subfuncid(devinfo, inst)]);
+ break;
+
case GEN7_SFID_DATAPORT_DATA_CACHE:
if (devinfo->gen >= 7) {
format(file, " (");
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 96e23697923..b91597a9f5d 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -92,8 +92,10 @@ get_hw_prim_for_gl_prim(int mode)
{
if (mode >= BRW_PRIM_OFFSET)
return mode - BRW_PRIM_OFFSET;
- else
+ else {
+ assert(mode < ARRAY_SIZE(prim_to_hw_prim));
return prim_to_hw_prim[mode];
+ }
}
@@ -178,6 +180,7 @@ static void brw_emit_prim(struct brw_context *brw,
int verts_per_instance;
int vertex_access_type;
int indirect_flag;
+ int predicate_enable;
DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
prim->start, prim->count);
@@ -258,10 +261,14 @@ static void brw_emit_prim(struct brw_context *brw,
indirect_flag = 0;
}
-
if (brw->gen >= 7) {
+ if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
+ predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE;
+ else
+ predicate_enable = 0;
+
BEGIN_BATCH(7);
- OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
OUT_BATCH(hw_prim | vertex_access_type);
} else {
BEGIN_BATCH(6);
@@ -561,12 +568,7 @@ void brw_draw_prims( struct gl_context *ctx,
assert(unused_tfb_object == NULL);
- if (ctx->Query.CondRenderQuery) {
- perf_debug("Conditional rendering is implemented in software and may "
- "stall. This should be fixed in the driver.\n");
- }
-
- if (!_mesa_check_conditional_render(ctx))
+ if (!brw_check_conditional_render(brw))
return;
/* Handle primitive restart if needed */
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 0e7be1e1ea0..761aa0ec5fa 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -361,6 +361,8 @@ brw_jump_scale(const struct brw_device_info *devinfo)
return 1;
}
+void brw_barrier(struct brw_codegen *p, struct brw_reg src);
+
/* If/else/endif. Works by manipulating the execution flags on each
* channel.
*/
@@ -390,6 +392,8 @@ brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
void brw_NOP(struct brw_codegen *p);
+void brw_WAIT(struct brw_codegen *p);
+
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 69cb114b945..67f0b45ac04 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info *devinfo,
static bool
has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
{
+ /* EOT can only be mapped on a send if the src1 is an immediate */
+ if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
+ brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
+ brw_inst_eot(devinfo, src))
+ return true;
+
/* Check for instruction bits that don't map to any of the fields of the
* compacted instruction. The instruction cannot be compacted if any of
* them are set. They overlap with:
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index e78d0bec268..0f536046f6f 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -914,6 +914,8 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD);
brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD);
break;
+ default:
+ unreachable("not reached");
}
}
@@ -3404,3 +3406,54 @@ void brw_shader_time_add(struct brw_codegen *p,
brw_pop_insn_state(p);
}
+
+
+/**
+ * Emit the SEND message for a barrier
+ */
+void
+brw_barrier(struct brw_codegen *p, struct brw_reg src)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+ struct brw_inst *inst;
+
+ assert(devinfo->gen >= 7);
+
+ inst = next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, inst, brw_null_reg());
+ brw_set_src0(p, inst, src);
+ brw_set_src1(p, inst, brw_null_reg());
+
+ brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY,
+ 1 /* msg_length */,
+ 0 /* response_length */,
+ false /* header_present */,
+ false /* end_of_thread */);
+
+ brw_inst_set_gateway_notify(devinfo, inst, 1);
+ brw_inst_set_gateway_subfuncid(devinfo, inst,
+ BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
+
+ brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
+}
+
+
+/**
+ * Emit the wait instruction for a barrier
+ */
+void
+brw_WAIT(struct brw_codegen *p)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+ struct brw_inst *insn;
+
+ struct brw_reg src = brw_notification_reg();
+
+ insn = next_insn(p, BRW_OPCODE_WAIT);
+ brw_set_dest(p, insn, src);
+ brw_set_src0(p, insn, src);
+ brw_set_src1(p, insn, brw_null_reg());
+
+ brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
+ brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5ce1dfc6633..2c0ff961182 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -49,6 +49,8 @@
#include "glsl/glsl_types.h"
#include "program/sampler.h"
+using namespace brw;
+
void
fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg *src, unsigned sources)
@@ -212,152 +214,13 @@ fs_inst::resize_sources(uint8_t num_sources)
}
}
-#define ALU1(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0); \
- }
-
-#define ALU2(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
- }
-
-#define ALU2_ACC(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1) \
- { \
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
- inst->writes_accumulator = true; \
- return inst; \
- }
-
-#define ALU3(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1, const fs_reg &src2) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
- }
-
-ALU1(NOT)
-ALU1(MOV)
-ALU1(FRC)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU2(ADD)
-ALU2(MUL)
-ALU2_ACC(MACH)
-ALU2(AND)
-ALU2(OR)
-ALU2(XOR)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(ASR)
-ALU3(LRP)
-ALU1(BFREV)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(CBIT)
-ALU3(MAD)
-ALU2_ACC(ADDC)
-ALU2_ACC(SUBB)
-ALU2(SEL)
-ALU2(MAC)
-
-/** Gen4 predicated IF. */
-fs_inst *
-fs_visitor::IF(enum brw_predicate predicate)
-{
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width);
- inst->predicate = predicate;
- return inst;
-}
-
-/** Gen6 IF with embedded comparison. */
-fs_inst *
-fs_visitor::IF(const fs_reg &src0, const fs_reg &src1,
- enum brw_conditional_mod condition)
-{
- assert(devinfo->gen == 6);
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width,
- reg_null_d, src0, src1);
- inst->conditional_mod = condition;
- return inst;
-}
-
-/**
- * CMP: Sets the low bit of the destination channels with the result
- * of the comparison, while the upper bits are undefined, and updates
- * the flag register with the packed 16 bits of the result.
- */
-fs_inst *
-fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1,
- enum brw_conditional_mod condition)
-{
- fs_inst *inst;
-
- /* Take the instruction:
- *
- * CMP null<d> src0<f> src1<f>
- *
- * Original gen4 does type conversion to the destination type before
- * comparison, producing garbage results for floating point comparisons.
- *
- * The destination type doesn't matter on newer generations, so we set the
- * type to match src0 so we can compact the instruction.
- */
- dst.type = src0.type;
- if (dst.file == HW_REG)
- dst.fixed_hw_reg.type = dst.type;
-
- resolve_ud_negate(&src0);
- resolve_ud_negate(&src1);
-
- inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1);
- inst->conditional_mod = condition;
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources,
- int header_size)
-{
- assert(dst.width % 8 == 0);
- fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, dst.width,
- dst, src, sources);
- inst->header_size = header_size;
-
- for (int i = 0; i < header_size; i++)
- assert(src[i].file != GRF || src[i].width * type_sz(src[i].type) == 32);
- inst->regs_written = header_size;
-
- for (int i = header_size; i < sources; ++i)
- assert(src[i].file != GRF || src[i].width == dst.width);
- inst->regs_written += (sources - header_size) * (dst.width / 8);
-
- return inst;
-}
-
-exec_list
-fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
+void
+fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
+ const fs_reg &dst,
const fs_reg &surf_index,
const fs_reg &varying_offset,
uint32_t const_offset)
{
- exec_list instructions;
- fs_inst *inst;
-
/* We have our constant surface use a pitch of 4 bytes, so our index can
* be any component of a vector, and then we load 4 contiguous
* components starting from that.
@@ -370,8 +233,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
* the redundant ones.
*/
fs_reg vec4_offset = vgrf(glsl_type::int_type);
- instructions.push_tail(ADD(vec4_offset,
- varying_offset, fs_reg(const_offset & ~3)));
+ bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3));
int scale = 1;
if (devinfo->gen == 4 && dst.width == 8) {
@@ -393,9 +255,8 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
int regs_written = 4 * (dst.width / 8) * scale;
fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
dst.type, dst.width);
- inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
+ fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
- instructions.push_tail(inst);
if (devinfo->gen < 7) {
inst->base_mrf = 13;
@@ -406,30 +267,23 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
inst->mlen = 1 + dispatch_width / 8;
}
- fs_reg result = offset(vec4_result, (const_offset & 3) * scale);
- instructions.push_tail(MOV(dst, result));
-
- return instructions;
+ bld.MOV(dst, offset(vec4_result, (const_offset & 3) * scale));
}
/**
* A helper for MOV generation for fixing up broken hardware SEND dependency
* handling.
*/
-fs_inst *
-fs_visitor::DEP_RESOLVE_MOV(int grf)
+void
+fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf)
{
- fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
-
- inst->ir = NULL;
- inst->annotation = "send dependency resolve";
-
/* The caller always wants uncompressed to emit the minimal extra
* dependencies, and to avoid having to deal with aligning its regs to 2.
*/
- inst->exec_size = 8;
+ const fs_builder ubld = bld.annotate("send dependency resolve")
+ .half(0);
- return inst;
+ ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
}
bool
@@ -685,7 +539,7 @@ fs_visitor::type_size(const struct glsl_type *type)
* the destination of the MOV, with extra parameters set.
*/
fs_reg
-fs_visitor::get_timestamp(fs_inst **out_mov)
+fs_visitor::get_timestamp(const fs_builder &bld)
{
assert(devinfo->gen >= 7);
@@ -696,11 +550,10 @@ fs_visitor::get_timestamp(fs_inst **out_mov)
fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
- fs_inst *mov = MOV(dst, ts);
/* We want to read the 3 fields we care about even if it's not enabled in
* the dispatch.
*/
- mov->force_writemask_all = true;
+ bld.exec_all().MOV(dst, ts);
/* The caller wants the low 32 bits of the timestamp. Since it's running
* at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
@@ -714,105 +567,60 @@ fs_visitor::get_timestamp(fs_inst **out_mov)
*/
dst.set_smear(0);
- *out_mov = mov;
return dst;
}
void
fs_visitor::emit_shader_time_begin()
{
- current_annotation = "shader time start";
- fs_inst *mov;
- shader_start_time = get_timestamp(&mov);
- emit(mov);
+ shader_start_time = get_timestamp(bld.annotate("shader time start"));
}
void
fs_visitor::emit_shader_time_end()
{
- current_annotation = "shader time end";
-
- enum shader_time_shader_type type, written_type, reset_type;
- switch (stage) {
- case MESA_SHADER_VERTEX:
- type = ST_VS;
- written_type = ST_VS_WRITTEN;
- reset_type = ST_VS_RESET;
- break;
- case MESA_SHADER_GEOMETRY:
- type = ST_GS;
- written_type = ST_GS_WRITTEN;
- reset_type = ST_GS_RESET;
- break;
- case MESA_SHADER_FRAGMENT:
- if (dispatch_width == 8) {
- type = ST_FS8;
- written_type = ST_FS8_WRITTEN;
- reset_type = ST_FS8_RESET;
- } else {
- assert(dispatch_width == 16);
- type = ST_FS16;
- written_type = ST_FS16_WRITTEN;
- reset_type = ST_FS16_RESET;
- }
- break;
- case MESA_SHADER_COMPUTE:
- type = ST_CS;
- written_type = ST_CS_WRITTEN;
- reset_type = ST_CS_RESET;
- break;
- default:
- unreachable("fs_visitor::emit_shader_time_end missing code");
- }
-
/* Insert our code just before the final SEND with EOT. */
exec_node *end = this->instructions.get_tail();
assert(end && ((fs_inst *) end)->eot);
+ const fs_builder ibld = bld.annotate("shader time end")
+ .exec_all().at(NULL, end);
- fs_inst *tm_read;
- fs_reg shader_end_time = get_timestamp(&tm_read);
- end->insert_before(tm_read);
+ fs_reg shader_end_time = get_timestamp(ibld);
/* Check that there weren't any timestamp reset events (assuming these
* were the only two timestamp reads that happened).
*/
fs_reg reset = shader_end_time;
reset.set_smear(2);
- fs_inst *test = AND(reg_null_d, reset, fs_reg(1u));
- test->conditional_mod = BRW_CONDITIONAL_Z;
- test->force_writemask_all = true;
- end->insert_before(test);
- end->insert_before(IF(BRW_PREDICATE_NORMAL));
+ set_condmod(BRW_CONDITIONAL_Z,
+ ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u)));
+ ibld.IF(BRW_PREDICATE_NORMAL);
fs_reg start = shader_start_time;
start.negate = true;
fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
diff.set_smear(0);
- fs_inst *add = ADD(diff, start, shader_end_time);
- add->force_writemask_all = true;
- end->insert_before(add);
+ ibld.ADD(diff, start, shader_end_time);
/* If there were no instructions between the two timestamp gets, the diff
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
- add = ADD(diff, diff, fs_reg(-2u));
- add->force_writemask_all = true;
- end->insert_before(add);
-
- end->insert_before(SHADER_TIME_ADD(type, diff));
- end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u)));
- end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width));
- end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
- end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width));
+ ibld.ADD(diff, diff, fs_reg(-2u));
+ SHADER_TIME_ADD(ibld, 0, diff);
+ SHADER_TIME_ADD(ibld, 1, fs_reg(1u));
+ ibld.emit(BRW_OPCODE_ELSE);
+ SHADER_TIME_ADD(ibld, 2, fs_reg(1u));
+ ibld.emit(BRW_OPCODE_ENDIF);
}
-fs_inst *
-fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value)
+void
+fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
+ int shader_time_subindex,
+ fs_reg value)
{
- int shader_time_index =
- brw_get_shader_time_index(brw, shader_prog, prog, type);
- fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
+ int index = shader_time_index * 3 + shader_time_subindex;
+ fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
fs_reg payload;
if (dispatch_width == 8)
@@ -820,8 +628,7 @@ fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value)
else
payload = vgrf(glsl_type::uint_type);
- return new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
- fs_reg(), payload, offset, value);
+ bld.emit(SHADER_OPCODE_SHADER_TIME_ADD, fs_reg(), payload, offset, value);
}
void
@@ -864,65 +671,16 @@ fs_visitor::fail(const char *format, ...)
* During a SIMD16 compile (if one happens anyway), this just calls fail().
*/
void
-fs_visitor::no16(const char *format, ...)
+fs_visitor::no16(const char *msg)
{
- va_list va;
-
- va_start(va, format);
-
if (dispatch_width == 16) {
- vfail(format, va);
+ fail("%s", msg);
} else {
simd16_unsupported = true;
- if (brw->perf_debug) {
- if (no16_msg)
- ralloc_vasprintf_append(&no16_msg, format, va);
- else
- no16_msg = ralloc_vasprintf(mem_ctx, format, va);
- }
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s", msg);
}
-
- va_end(va);
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dispatch_width));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1, const fs_reg &src2)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst,
- fs_reg src[], int sources)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources));
}
/**
@@ -1051,7 +809,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
return inst->mlen;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- return 2;
+ return inst->mlen;
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -1077,14 +835,6 @@ fs_visitor::vgrf(const glsl_type *const type)
brw_type_for_base_type(type), dispatch_width);
}
-fs_reg
-fs_visitor::vgrf(int num_components)
-{
- int reg_width = dispatch_width / 8;
- return fs_reg(GRF, alloc.allocate(num_components * reg_width),
- BRW_REGISTER_TYPE_F, dispatch_width);
-}
-
/** Fixed HW reg constructor. */
fs_reg::fs_reg(enum register_file file, int reg)
{
@@ -1130,117 +880,18 @@ fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type,
this->width = width;
}
-fs_reg *
-fs_visitor::variable_storage(ir_variable *var)
-{
- return (fs_reg *)hash_table_find(this->variable_ht, var);
-}
-
-void
-import_uniforms_callback(const void *key,
- void *data,
- void *closure)
-{
- struct hash_table *dst_ht = (struct hash_table *)closure;
- const fs_reg *reg = (const fs_reg *)data;
-
- if (reg->file != UNIFORM)
- return;
-
- hash_table_insert(dst_ht, data, key);
-}
-
/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
* This brings in those uniform definitions
*/
void
fs_visitor::import_uniforms(fs_visitor *v)
{
- hash_table_call_foreach(v->variable_ht,
- import_uniforms_callback,
- variable_ht);
this->push_constant_loc = v->push_constant_loc;
this->pull_constant_loc = v->pull_constant_loc;
this->uniforms = v->uniforms;
this->param_size = v->param_size;
}
-/* Our support for uniforms is piggy-backed on the struct
- * gl_fragment_program, because that's where the values actually
- * get stored, rather than in some global gl_shader_program uniform
- * store.
- */
-void
-fs_visitor::setup_uniform_values(ir_variable *ir)
-{
- int namelen = strlen(ir->name);
-
- /* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_driver_storage structs for each subcomponent that
- * glGetUniformLocation() could name. We know it's been set up in the same
- * order we'd walk the type, so walk the list of storage and find anything
- * with our name, or the prefix of a component that starts with our name.
- */
- unsigned params_before = uniforms;
- for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
- struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
- if (strncmp(ir->name, storage->name, namelen) != 0 ||
- (storage->name[namelen] != 0 &&
- storage->name[namelen] != '.' &&
- storage->name[namelen] != '[')) {
- continue;
- }
-
- unsigned slots = storage->type->component_slots();
- if (storage->array_elements)
- slots *= storage->array_elements;
-
- for (unsigned i = 0; i < slots; i++) {
- stage_prog_data->param[uniforms++] = &storage->storage[i];
- }
- }
-
- /* Make sure we actually initialized the right amount of stuff here. */
- assert(params_before + ir->type->component_slots() == uniforms);
- (void)params_before;
-}
-
-
-/* Our support for builtin uniforms is even scarier than non-builtin.
- * It sits on top of the PROG_STATE_VAR parameters that are
- * automatically updated from GL context state.
- */
-void
-fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
-{
- const ir_state_slot *const slots = ir->get_state_slots();
- assert(slots != NULL);
-
- for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
- /* This state reference has already been setup by ir_to_mesa, but we'll
- * get the same index back here.
- */
- int index = _mesa_add_state_reference(this->prog->Parameters,
- (gl_state_index *)slots[i].tokens);
-
- /* Add each of the unique swizzles of the element as a parameter.
- * This'll end up matching the expected layout of the
- * array/matrix/structure we're trying to fill in.
- */
- int last_swiz = -1;
- for (unsigned int j = 0; j < 4; j++) {
- int swiz = GET_SWZ(slots[i].swizzle, j);
- if (swiz == last_swiz)
- break;
- last_swiz = swiz;
-
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][swiz];
- }
- }
-}
-
fs_reg *
fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
bool origin_upper_left)
@@ -1253,15 +904,15 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
/* gl_FragCoord.x */
if (pixel_center_integer) {
- emit(MOV(wpos, this->pixel_x));
+ bld.MOV(wpos, this->pixel_x);
} else {
- emit(ADD(wpos, this->pixel_x, fs_reg(0.5f)));
+ bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
}
wpos = offset(wpos, 1);
/* gl_FragCoord.y */
if (!flip && pixel_center_integer) {
- emit(MOV(wpos, this->pixel_y));
+ bld.MOV(wpos, this->pixel_y);
} else {
fs_reg pixel_y = this->pixel_y;
float offset = (pixel_center_integer ? 0.0 : 0.5);
@@ -1271,22 +922,22 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
offset += key->drawable_height - 1.0;
}
- emit(ADD(wpos, pixel_y, fs_reg(offset)));
+ bld.ADD(wpos, pixel_y, fs_reg(offset));
}
wpos = offset(wpos, 1);
/* gl_FragCoord.z */
if (devinfo->gen >= 6) {
- emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
+ bld.MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)));
} else {
- emit(FS_OPCODE_LINTERP, wpos,
+ bld.emit(FS_OPCODE_LINTERP, wpos,
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
interp_reg(VARYING_SLOT_POS, 2));
}
wpos = offset(wpos, 1);
/* gl_FragCoord.w: Already set up in emit_interpolation */
- emit(BRW_OPCODE_MOV, wpos, this->wpos_w);
+ bld.MOV(wpos, this->wpos_w);
return reg;
}
@@ -1321,8 +972,8 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
*/
barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
}
- return emit(FS_OPCODE_LINTERP, attr,
- this->delta_xy[barycoord_mode], interp);
+ return bld.emit(FS_OPCODE_LINTERP, attr,
+ this->delta_xy[barycoord_mode], interp);
}
void
@@ -1380,7 +1031,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
struct brw_reg interp = interp_reg(location, k);
interp = suboffset(interp, 3);
interp.type = attr.type;
- emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
+ bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
attr = offset(attr, 1);
}
} else {
@@ -1393,7 +1044,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
* unlit, replace the centroid data with non-centroid
* data.
*/
- emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
fs_inst *inst;
inst = emit_linterp(attr, fs_reg(interp), interpolation_mode,
@@ -1417,7 +1068,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
mod_sample || key->persample_shading);
}
if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
- emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
+ bld.MUL(attr, attr, this->pixel_w);
}
attr = offset(attr, 1);
}
@@ -1448,7 +1099,7 @@ fs_visitor::emit_frontfacing_interpolation()
fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
g0.negate = true;
- emit(ASR(*reg, g0, fs_reg(15)));
+ bld.ASR(*reg, g0, fs_reg(15));
} else {
/* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create
* a boolean result from this (1/true or 0/false).
@@ -1463,7 +1114,7 @@ fs_visitor::emit_frontfacing_interpolation()
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
g1_6.negate = true;
- emit(ASR(*reg, g1_6, fs_reg(31)));
+ bld.ASR(*reg, g1_6, fs_reg(31));
}
return reg;
@@ -1478,9 +1129,9 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
if (key->compute_pos_offset) {
/* Convert int_sample_pos to floating point */
- emit(MOV(dst, int_sample_pos));
+ bld.MOV(dst, int_sample_pos);
/* Scale to the range [0, 1] */
- emit(MUL(dst, dst, fs_reg(1 / 16.0f)));
+ bld.MUL(dst, dst, fs_reg(1 / 16.0f));
}
else {
/* From ARB_sample_shading specification:
@@ -1488,7 +1139,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
* rasterization is disabled, gl_SamplePosition will always be
* (0.5, 0.5).
*/
- emit(MOV(dst, fs_reg(0.5f)));
+ bld.MOV(dst, fs_reg(0.5f));
}
}
@@ -1497,7 +1148,7 @@ fs_visitor::emit_samplepos_setup()
{
assert(devinfo->gen >= 6);
- this->current_annotation = "compute sample position";
+ const fs_builder abld = bld.annotate("compute sample position");
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec2_type));
fs_reg pos = *reg;
fs_reg int_sample_x = vgrf(glsl_type::int_type);
@@ -1519,22 +1170,22 @@ fs_visitor::emit_samplepos_setup()
BRW_REGISTER_TYPE_B), 16, 8, 2);
if (dispatch_width == 8) {
- emit(MOV(int_sample_x, fs_reg(sample_pos_reg)));
+ abld.MOV(int_sample_x, fs_reg(sample_pos_reg));
} else {
- emit(MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg)));
- emit(MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16))))
- ->force_sechalf = true;
+ abld.half(0).MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg));
+ abld.half(1).MOV(half(int_sample_x, 1),
+ fs_reg(suboffset(sample_pos_reg, 16)));
}
/* Compute gl_SamplePosition.x */
compute_sample_position(pos, int_sample_x);
pos = offset(pos, 1);
if (dispatch_width == 8) {
- emit(MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1))));
+ abld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)));
} else {
- emit(MOV(half(int_sample_y, 0),
- fs_reg(suboffset(sample_pos_reg, 1))));
- emit(MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17))))
- ->force_sechalf = true;
+ abld.half(0).MOV(half(int_sample_y, 0),
+ fs_reg(suboffset(sample_pos_reg, 1)));
+ abld.half(1).MOV(half(int_sample_y, 1),
+ fs_reg(suboffset(sample_pos_reg, 17)));
}
/* Compute gl_SamplePosition.y */
compute_sample_position(pos, int_sample_y);
@@ -1548,7 +1199,7 @@ fs_visitor::emit_sampleid_setup()
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
assert(devinfo->gen >= 6);
- this->current_annotation = "compute sample id";
+ const fs_builder abld = bld.annotate("compute sample id");
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
if (key->compute_sample_id) {
@@ -1575,26 +1226,25 @@ fs_visitor::emit_sampleid_setup()
* are sample 1 of subspan 0; the third group is sample 0 of
* subspan 1, and finally sample 1 of subspan 1.
*/
- fs_inst *inst;
- inst = emit(BRW_OPCODE_AND, t1,
- fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
- fs_reg(0xc0));
- inst->force_writemask_all = true;
- inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
- inst->force_writemask_all = true;
+ abld.exec_all()
+ .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ fs_reg(0xc0));
+ abld.exec_all().SHR(t1, t1, fs_reg(5));
+
/* This works for both SIMD8 and SIMD16 */
- inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)));
- inst->force_writemask_all = true;
+ abld.exec_all()
+ .MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210));
+
/* This special instruction takes care of setting vstride=1,
* width=4, hstride=0 of t2 during an ADD instruction.
*/
- emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
+ abld.emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
} else {
/* As per GL_ARB_sample_shading specification:
* "When rendering to a non-multisample buffer, or if multisample
* rasterization is disabled, gl_SampleID will always be zero."
*/
- emit(BRW_OPCODE_MOV, *reg, fs_reg(0));
+ abld.MOV(*reg, fs_reg(0));
}
return reg;
@@ -1606,111 +1256,11 @@ fs_visitor::resolve_source_modifiers(fs_reg *src)
if (!src->abs && !src->negate)
return;
- fs_reg temp = retype(vgrf(1), src->type);
- emit(MOV(temp, *src));
+ fs_reg temp = bld.vgrf(src->type);
+ bld.MOV(temp, *src);
*src = temp;
}
-fs_reg
-fs_visitor::fix_math_operand(fs_reg src)
-{
- /* Can't do hstride == 0 args on gen6 math, so expand it out. We
- * might be able to do better by doing execsize = 1 math and then
- * expanding that result out, but we would need to be careful with
- * masking.
- *
- * The hardware ignores source modifiers (negate and abs) on math
- * instructions, so we also move to a temp to set those up.
- */
- if (devinfo->gen == 6 && src.file != UNIFORM && src.file != IMM &&
- !src.abs && !src.negate)
- return src;
-
- /* Gen7 relaxes most of the above restrictions, but still can't use IMM
- * operands to math
- */
- if (devinfo->gen >= 7 && src.file != IMM)
- return src;
-
- fs_reg expanded = vgrf(glsl_type::float_type);
- expanded.type = src.type;
- emit(BRW_OPCODE_MOV, expanded, src);
- return expanded;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
-{
- switch (opcode) {
- case SHADER_OPCODE_RCP:
- case SHADER_OPCODE_RSQ:
- case SHADER_OPCODE_SQRT:
- case SHADER_OPCODE_EXP2:
- case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_SIN:
- case SHADER_OPCODE_COS:
- break;
- default:
- unreachable("not reached: bad math opcode");
- }
-
- /* Can't do hstride == 0 args to gen6 math, so expand it out. We
- * might be able to do better by doing execsize = 1 math and then
- * expanding that result out, but we would need to be careful with
- * masking.
- *
- * Gen 6 hardware ignores source modifiers (negate and abs) on math
- * instructions, so we also move to a temp to set those up.
- */
- if (devinfo->gen == 6 || devinfo->gen == 7)
- src = fix_math_operand(src);
-
- fs_inst *inst = emit(opcode, dst, src);
-
- if (devinfo->gen < 6) {
- inst->base_mrf = 2;
- inst->mlen = dispatch_width / 8;
- }
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
-{
- int base_mrf = 2;
- fs_inst *inst;
-
- if (devinfo->gen >= 8) {
- inst = emit(opcode, dst, src0, src1);
- } else if (devinfo->gen >= 6) {
- src0 = fix_math_operand(src0);
- src1 = fix_math_operand(src1);
-
- inst = emit(opcode, dst, src0, src1);
- } else {
- /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
- * "Message Payload":
- *
- * "Operand0[7]. For the INT DIV functions, this operand is the
- * denominator."
- * ...
- * "Operand1[7]. For the INT DIV functions, this operand is the
- * numerator."
- */
- bool is_int_div = opcode != SHADER_OPCODE_POW;
- fs_reg &op0 = is_int_div ? src1 : src0;
- fs_reg &op1 = is_int_div ? src0 : src1;
-
- emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1));
- inst = emit(opcode, dst, op0, reg_null_f);
-
- inst->base_mrf = base_mrf;
- inst->mlen = 2 * dispatch_width / 8;
- }
- return inst;
-}
-
void
fs_visitor::emit_discard_jump()
{
@@ -1719,7 +1269,7 @@ fs_visitor::emit_discard_jump()
/* For performance, after a discard, jump to the end of the
* shader if all relevant channels have been discarded.
*/
- fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
+ fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP);
discard_jump->flag_subreg = 1;
discard_jump->predicate = (dispatch_width == 8)
@@ -2317,26 +1867,22 @@ fs_visitor::demote_pull_constants()
continue;
/* Set up the annotation tracking for new generated instructions. */
- base_ir = inst->ir;
- current_annotation = inst->annotation;
-
+ const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
+ .at(block, inst);
fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
fs_reg dst = vgrf(glsl_type::float_type);
/* Generate a pull load into dst. */
if (inst->src[i].reladdr) {
- exec_list list = VARYING_PULL_CONSTANT_LOAD(dst,
- surf_index,
- *inst->src[i].reladdr,
- pull_index);
- inst->insert_before(block, &list);
+ VARYING_PULL_CONSTANT_LOAD(ibld, dst,
+ surf_index,
+ *inst->src[i].reladdr,
+ pull_index);
inst->src[i].reladdr = NULL;
} else {
fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
- fs_inst *pull =
- new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
- dst, surf_index, offset);
- inst->insert_before(block, pull);
+ ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ dst, surf_index, offset);
inst->src[i].set_smear(pull_index & 3);
}
@@ -2663,6 +2209,16 @@ fs_visitor::opt_sampler_eot()
if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex())
return false;
+ /* This optimisation doesn't seem to work for textureGather for some
+ * reason. I can't find any documentation or known workarounds to indicate
+ * that this is expected, but considering that it is probably pretty
+ * unlikely that a shader would directly write out the results from
+ * textureGather we might as well just disable it.
+ */
+ if (tex_inst->opcode == SHADER_OPCODE_TG4 ||
+ tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET)
+ return false;
+
/* If there's no header present, we need to munge the LOAD_PAYLOAD as well.
* It's very likely to be the previous instruction.
*/
@@ -2676,7 +2232,7 @@ fs_visitor::opt_sampler_eot()
tex_inst->offset |= fb_write->target << 24;
tex_inst->eot = true;
- tex_inst->dst = reg_null_ud;
+ tex_inst->dst = bld.null_reg_ud();
fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
/* If a header is present, marking the eot is sufficient. Otherwise, we need
@@ -2688,7 +2244,8 @@ fs_visitor::opt_sampler_eot()
if (tex_inst->header_size != 0)
return true;
- fs_reg send_header = vgrf(load_payload->sources + 1);
+ fs_reg send_header = bld.vgrf(BRW_REGISTER_TYPE_F,
+ load_payload->sources + 1);
fs_reg *new_sources =
ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1);
@@ -3041,8 +2598,8 @@ fs_visitor::emit_repclear_shader()
fs_inst *mov;
if (uniforms == 1) {
- mov = emit(MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)));
+ mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
} else {
struct brw_reg reg =
brw_reg(BRW_GENERAL_REGISTER_FILE,
@@ -3051,14 +2608,13 @@ fs_visitor::emit_repclear_shader()
BRW_WIDTH_2,
BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- mov = emit(MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg)));
+ mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(reg));
}
- mov->force_writemask_all = true;
-
fs_inst *write;
if (key->nr_color_regions == 1) {
- write = emit(FS_OPCODE_REP_FB_WRITE);
+ write = bld.emit(FS_OPCODE_REP_FB_WRITE);
write->saturate = key->clamp_fragment_color;
write->base_mrf = color_mrf;
write->target = 0;
@@ -3067,7 +2623,7 @@ fs_visitor::emit_repclear_shader()
} else {
assume(key->nr_color_regions > 0);
for (int i = 0; i < key->nr_color_regions; ++i) {
- write = emit(FS_OPCODE_REP_FB_WRITE);
+ write = bld.emit(FS_OPCODE_REP_FB_WRITE);
write->saturate = key->clamp_fragment_color;
write->base_mrf = base_mrf;
write->target = i;
@@ -3223,9 +2779,8 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
*/
if (block->start() == scan_inst) {
for (int i = 0; i < write_len; i++) {
- if (needs_dep[i]) {
- inst->insert_before(block, DEP_RESOLVE_MOV(first_write_grf + i));
- }
+ if (needs_dep[i])
+ DEP_RESOLVE_MOV(bld.at(block, inst), first_write_grf + i);
}
return;
}
@@ -3241,7 +2796,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
if (reg >= first_write_grf &&
reg < first_write_grf + write_len &&
needs_dep[reg - first_write_grf]) {
- inst->insert_before(block, DEP_RESOLVE_MOV(reg));
+ DEP_RESOLVE_MOV(bld.at(block, inst), reg);
needs_dep[reg - first_write_grf] = false;
if (scan_inst->exec_size == 16)
needs_dep[reg - first_write_grf + 1] = false;
@@ -3288,8 +2843,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
if (block->end() == scan_inst) {
for (int i = 0; i < write_len; i++) {
if (needs_dep[i])
- scan_inst->insert_before(block,
- DEP_RESOLVE_MOV(first_write_grf + i));
+ DEP_RESOLVE_MOV(bld.at(block, scan_inst), first_write_grf + i);
}
return;
}
@@ -3304,7 +2858,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
scan_inst->dst.reg >= first_write_grf &&
scan_inst->dst.reg < first_write_grf + write_len &&
needs_dep[scan_inst->dst.reg - first_write_grf]) {
- scan_inst->insert_before(block, DEP_RESOLVE_MOV(scan_inst->dst.reg));
+ DEP_RESOLVE_MOV(bld.at(block, scan_inst), scan_inst->dst.reg);
needs_dep[scan_inst->dst.reg - first_write_grf] = false;
}
@@ -3429,6 +2983,9 @@ fs_visitor::lower_load_payload()
assert(inst->dst.file == MRF || inst->dst.file == GRF);
assert(inst->saturate == false);
+ const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf)
+ .exec_all(inst->force_writemask_all)
+ .at(block, inst);
fs_reg dst = inst->dst;
/* Get rid of COMPR4. We'll add it back in if we need it */
@@ -3441,9 +2998,7 @@ fs_visitor::lower_load_payload()
fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD);
fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD);
mov_src.width = 8;
- fs_inst *mov = MOV(mov_dst, mov_src);
- mov->force_writemask_all = true;
- inst->insert_before(block, mov);
+ ibld.exec_all().MOV(mov_dst, mov_src);
}
dst = offset(dst, 1);
}
@@ -3474,23 +3029,13 @@ fs_visitor::lower_load_payload()
if (devinfo->has_compr4) {
fs_reg compr4_dst = retype(dst, inst->src[i].type);
compr4_dst.reg |= BRW_MRF_COMPR4;
-
- fs_inst *mov = MOV(compr4_dst, inst->src[i]);
- mov->force_writemask_all = inst->force_writemask_all;
- inst->insert_before(block, mov);
+ ibld.MOV(compr4_dst, inst->src[i]);
} else {
/* Platform doesn't have COMPR4. We have to fake it */
fs_reg mov_dst = retype(dst, inst->src[i].type);
mov_dst.width = 8;
-
- fs_inst *mov = MOV(mov_dst, half(inst->src[i], 0));
- mov->force_writemask_all = inst->force_writemask_all;
- inst->insert_before(block, mov);
-
- mov = MOV(offset(mov_dst, 4), half(inst->src[i], 1));
- mov->force_writemask_all = inst->force_writemask_all;
- mov->force_sechalf = true;
- inst->insert_before(block, mov);
+ ibld.half(0).MOV(mov_dst, half(inst->src[i], 0));
+ ibld.half(1).MOV(offset(mov_dst, 4), half(inst->src[i], 1));
}
}
@@ -3513,12 +3058,8 @@ fs_visitor::lower_load_payload()
}
for (uint8_t i = inst->header_size; i < inst->sources; i++) {
- if (inst->src[i].file != BAD_FILE) {
- fs_inst *mov = MOV(retype(dst, inst->src[i].type),
- inst->src[i]);
- mov->force_writemask_all = inst->force_writemask_all;
- inst->insert_before(block, mov);
- }
+ if (inst->src[i].file != BAD_FILE)
+ ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]);
dst = offset(dst, 1);
}
@@ -3532,6 +3073,172 @@ fs_visitor::lower_load_payload()
return progress;
}
+bool
+fs_visitor::lower_integer_multiplication()
+{
+ bool progress = false;
+
+ /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation
+ * directly, but Cherryview cannot.
+ */
+ if (devinfo->gen >= 8 && !devinfo->is_cherryview)
+ return false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ if (inst->opcode != BRW_OPCODE_MUL ||
+ inst->dst.is_accumulator() ||
+ (inst->dst.type != BRW_REGISTER_TYPE_D &&
+ inst->dst.type != BRW_REGISTER_TYPE_UD))
+ continue;
+
+ const fs_builder ibld = bld.at(block, inst);
+
+ /* The MUL instruction isn't commutative. On Gen <= 6, only the low
+ * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
+ * src1 are used.
+ *
+ * If multiplying by an immediate value that fits in 16-bits, do a
+ * single MUL instruction with that value in the proper location.
+ */
+ if (inst->src[1].file == IMM &&
+ inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+ if (devinfo->gen < 7) {
+ fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst.type, dispatch_width);
+ ibld.MOV(imm, inst->src[1]);
+ ibld.MUL(inst->dst, imm, inst->src[0]);
+ } else {
+ ibld.MUL(inst->dst, inst->src[0], inst->src[1]);
+ }
+ } else {
+ /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
+ * do 32-bit integer multiplication in one instruction, but instead
+ * must do a sequence (which actually calculates a 64-bit result):
+ *
+ * mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D
+ * mach(8) null g3<8,8,1>D g4<8,8,1>D
+ * mov(8) g2<1>D acc0<8,8,1>D
+ *
+ * But on Gen > 6, the ability to use second accumulator register
+ * (acc1) for non-float data types was removed, preventing a simple
+ * implementation in SIMD16. A 16-channel result can be calculated by
+ * executing the three instructions twice in SIMD8, once with quarter
+ * control of 1Q for the first eight channels and again with 2Q for
+ * the second eight channels.
+ *
+ * Which accumulator register is implicitly accessed (by AccWrEnable
+ * for instance) is determined by the quarter control. Unfortunately
+ * Ivybridge (and presumably Baytrail) has a hardware bug in which an
+ * implicit accumulator access by an instruction with 2Q will access
+ * acc1 regardless of whether the data type is usable in acc1.
+ *
+ * Specifically, the 2Q mach(8) writes acc1 which does not exist for
+ * integer data types.
+ *
+ * Since we only want the low 32-bits of the result, we can do two
+ * 32-bit x 16-bit multiplies (like the mul and mach are doing), and
+ * adjust the high result and add them (like the mach is doing):
+ *
+ * mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW
+ * mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW
+ * shl(8) g9<1>D g8<8,8,1>D 16D
+ * add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D
+ *
+ * We avoid the shl instruction by realizing that we only want to add
+ * the low 16-bits of the "high" result to the high 16-bits of the
+ * "low" result and using proper regioning on the add:
+ *
+ * mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW
+ * mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW
+ * add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW
+ *
+ * Since it does not use the (single) accumulator register, we can
+ * schedule multi-component multiplications much better.
+ */
+
+ if (inst->conditional_mod && inst->dst.is_null()) {
+ inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst.type, dispatch_width);
+ }
+ fs_reg low = inst->dst;
+ fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst.type, dispatch_width);
+
+ if (devinfo->gen >= 7) {
+ fs_reg src1_0_w = inst->src[1];
+ fs_reg src1_1_w = inst->src[1];
+
+ if (inst->src[1].file == IMM) {
+ src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
+ src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+ } else {
+ src1_0_w.type = BRW_REGISTER_TYPE_UW;
+ if (src1_0_w.stride != 0) {
+ assert(src1_0_w.stride == 1);
+ src1_0_w.stride = 2;
+ }
+
+ src1_1_w.type = BRW_REGISTER_TYPE_UW;
+ if (src1_1_w.stride != 0) {
+ assert(src1_1_w.stride == 1);
+ src1_1_w.stride = 2;
+ }
+ src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
+ }
+ ibld.MUL(low, inst->src[0], src1_0_w);
+ ibld.MUL(high, inst->src[0], src1_1_w);
+ } else {
+ fs_reg src0_0_w = inst->src[0];
+ fs_reg src0_1_w = inst->src[0];
+
+ src0_0_w.type = BRW_REGISTER_TYPE_UW;
+ if (src0_0_w.stride != 0) {
+ assert(src0_0_w.stride == 1);
+ src0_0_w.stride = 2;
+ }
+
+ src0_1_w.type = BRW_REGISTER_TYPE_UW;
+ if (src0_1_w.stride != 0) {
+ assert(src0_1_w.stride == 1);
+ src0_1_w.stride = 2;
+ }
+ src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
+
+ ibld.MUL(low, src0_0_w, inst->src[1]);
+ ibld.MUL(high, src0_1_w, inst->src[1]);
+ }
+
+ fs_reg dst = inst->dst;
+ dst.type = BRW_REGISTER_TYPE_UW;
+ dst.subreg_offset = 2;
+ dst.stride = 2;
+
+ high.type = BRW_REGISTER_TYPE_UW;
+ high.stride = 2;
+
+ low.type = BRW_REGISTER_TYPE_UW;
+ low.subreg_offset = 2;
+ low.stride = 2;
+
+ ibld.ADD(dst, low, high);
+
+ if (inst->conditional_mod) {
+ fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+ set_condmod(inst->conditional_mod,
+ ibld.MOV(null, inst->dst));
+ }
+ }
+
+ inst->remove(block);
+ progress = true;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
void
fs_visitor::dump_instructions()
{
@@ -3602,6 +3309,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
}
fprintf(file, "(%d) ", inst->exec_size);
+ if (inst->mlen) {
+ fprintf(file, "(mlen: %d) ", inst->mlen);
+ }
switch (inst->dst.file) {
case GRF:
@@ -3895,7 +3605,7 @@ fs_visitor::setup_vs_payload()
void
fs_visitor::setup_cs_payload()
{
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
payload.num_regs = 1;
}
@@ -3938,6 +3648,17 @@ fs_visitor::calculate_register_pressure()
void
fs_visitor::optimize()
{
+ /* bld is the common builder object pointing at the end of the program we
+ * used to translate it into i965 IR. For the optimization and lowering
+ * passes coming next, any code added after the end of the program without
+ * having explicitly called fs_builder::at() clearly points at a mistake.
+ * Ideally optimization passes wouldn't be part of the visitor so they
+ * wouldn't have access to bld at all, but they do, so just in case some
+ * pass forgets to ask for a location explicitly set it to NULL here to
+ * make it trip.
+ */
+ bld = bld.at(NULL, NULL);
+
split_virtual_grfs();
move_uniform_array_access_to_pull_constants();
@@ -3953,7 +3674,7 @@ fs_visitor::optimize()
snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \
stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
\
- backend_visitor::dump_instructions(filename); \
+ backend_shader::dump_instructions(filename); \
} \
\
progress = progress || this_progress; \
@@ -3966,7 +3687,7 @@ fs_visitor::optimize()
stage_abbrev, dispatch_width,
shader_prog ? shader_prog->Name : 0);
- backend_visitor::dump_instructions(filename);
+ backend_shader::dump_instructions(filename);
}
bool progress;
@@ -4010,6 +3731,7 @@ fs_visitor::optimize()
}
OPT(opt_combine_constants);
+ OPT(lower_integer_multiplication);
lower_uniform_pull_constant_loads();
}
@@ -4066,9 +3788,11 @@ fs_visitor::allocate_registers()
fail("Failure to register allocate. Reduce number of "
"live scalar values to avoid this.");
} else {
- perf_debug("%s shader triggered register spilling. "
- "Try reducing the number of live scalar values to "
- "improve performance.\n", stage_name);
+ compiler->shader_perf_log(log_data,
+ "%s shader triggered register spilling. "
+ "Try reducing the number of live scalar "
+ "values to improve performance.\n",
+ stage_name);
}
/* Since we're out of heuristics, just go spill registers until we
@@ -4097,7 +3821,7 @@ fs_visitor::allocate_registers()
}
bool
-fs_visitor::run_vs()
+fs_visitor::run_vs(gl_clip_plane *clip_planes)
{
assert(stage == MESA_SHADER_VERTEX);
@@ -4105,26 +3829,17 @@ fs_visitor::run_vs()
assign_common_binding_table_offsets(0);
setup_vs_payload();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
- if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
- emit_nir_code();
- } else {
- foreach_in_list(ir_instruction, ir, shader->base.ir) {
- base_ir = ir;
- this->result = reg_undef;
- ir->accept(this);
- }
- base_ir = NULL;
- }
+ emit_nir_code();
if (failed)
return false;
- emit_urb_writes();
+ emit_urb_writes(clip_planes);
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
@@ -4141,7 +3856,7 @@ fs_visitor::run_vs()
}
bool
-fs_visitor::run_fs()
+fs_visitor::run_fs(bool do_rep_send)
{
brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
@@ -4160,10 +3875,11 @@ fs_visitor::run_fs()
if (0) {
emit_dummy_fs();
- } else if (brw->use_rep_send && dispatch_width == 16) {
+ } else if (do_rep_send) {
+ assert(dispatch_width == 16);
emit_repclear_shader();
} else {
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
calculate_urb_setup();
@@ -4178,37 +3894,27 @@ fs_visitor::run_fs()
* Initialize it with the dispatched pixels.
*/
if (wm_prog_data->uses_kill) {
- fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ fs_inst *discard_init = bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
discard_init->flag_subreg = 1;
}
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
- if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
- emit_nir_code();
- } else if (shader) {
- foreach_in_list(ir_instruction, ir, shader->base.ir) {
- base_ir = ir;
- this->result = reg_undef;
- ir->accept(this);
- }
- } else {
- emit_fragment_program_code();
- }
- base_ir = NULL;
+ emit_nir_code();
+
if (failed)
return false;
if (wm_prog_data->uses_kill)
- emit(FS_OPCODE_PLACEHOLDER_HALT);
+ bld.emit(FS_OPCODE_PLACEHOLDER_HALT);
if (wm_key->alpha_test_func)
emit_alpha_test();
emit_fb_writes();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
@@ -4252,7 +3958,7 @@ fs_visitor::run_cs()
setup_cs_payload();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
@@ -4262,7 +3968,7 @@ fs_visitor::run_cs()
emit_cs_terminate();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
@@ -4312,10 +4018,18 @@ brw_wm_fs_emit(struct brw_context *brw,
if (unlikely(INTEL_DEBUG & DEBUG_WM))
brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
+ int st_index8 = -1, st_index16 = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+ st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
+ st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
+ }
+
/* Now the main event: Visit the shader IR and generate our FS IR for it.
*/
- fs_visitor v(brw, mem_ctx, key, prog_data, prog, fp, 8);
- if (!v.run_fs()) {
+ fs_visitor v(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+ prog, &fp->Base, 8, st_index8);
+ if (!v.run_fs(false /* do_rep_send */)) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -4328,20 +4042,18 @@ brw_wm_fs_emit(struct brw_context *brw,
}
cfg_t *simd16_cfg = NULL;
- fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16);
+ fs_visitor v2(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+ prog, &fp->Base, 16, st_index16);
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
v2.import_uniforms(&v);
- if (!v2.run_fs()) {
- perf_debug("SIMD16 shader failed to compile, falling back to "
- "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+ if (!v2.run_fs(brw->use_rep_send)) {
+ perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
} else {
simd16_cfg = v2.cfg;
}
- } else {
- perf_debug("SIMD16 shader unsupported, falling back to "
- "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg);
}
}
@@ -4355,7 +4067,8 @@ brw_wm_fs_emit(struct brw_context *brw,
prog_data->no_8 = false;
}
- fs_generator g(brw, mem_ctx, (void *) key, &prog_data->base,
+ fs_generator g(brw->intelScreen->compiler, brw,
+ mem_ctx, (void *) key, &prog_data->base,
&fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 1d7de2effbd..243baf688de 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -29,6 +29,7 @@
#include "brw_shader.h"
#include "brw_ir_fs.h"
+#include "brw_fs_builder.h"
extern "C" {
@@ -66,138 +67,44 @@ namespace brw {
*
* Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
*/
-class fs_visitor : public backend_visitor
+class fs_visitor : public backend_shader
{
public:
- const fs_reg reg_null_f;
- const fs_reg reg_null_d;
- const fs_reg reg_null_ud;
-
- fs_visitor(struct brw_context *brw,
+ fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data,
+ gl_shader_stage stage,
+ const void *key,
+ struct brw_stage_prog_data *prog_data,
struct gl_shader_program *shader_prog,
- struct gl_fragment_program *fp,
- unsigned dispatch_width);
-
- fs_visitor(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_vs_prog_key *key,
- struct brw_vs_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
- struct gl_vertex_program *cp,
- unsigned dispatch_width);
-
- fs_visitor(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_cs_prog_key *key,
- struct brw_cs_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
- struct gl_compute_program *cp,
- unsigned dispatch_width);
+ struct gl_program *prog,
+ unsigned dispatch_width,
+ int shader_time_index);
~fs_visitor();
- void init();
- fs_reg *variable_storage(ir_variable *var);
fs_reg vgrf(const glsl_type *const type);
- fs_reg vgrf(int num_components);
void import_uniforms(fs_visitor *v);
- void setup_uniform_clipplane_values();
- void compute_clip_distance();
-
- void visit(ir_variable *ir);
- void visit(ir_assignment *ir);
- void visit(ir_dereference_variable *ir);
- void visit(ir_dereference_record *ir);
- void visit(ir_dereference_array *ir);
- void visit(ir_expression *ir);
- void visit(ir_texture *ir);
- void visit(ir_if *ir);
- void visit(ir_constant *ir);
- void visit(ir_swizzle *ir);
- void visit(ir_return *ir);
- void visit(ir_loop *ir);
- void visit(ir_loop_jump *ir);
- void visit(ir_discard *ir);
- void visit(ir_call *ir);
- void visit(ir_function *ir);
- void visit(ir_function_signature *ir);
- void visit(ir_emit_vertex *);
- void visit(ir_end_primitive *);
+ void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
+ void compute_clip_distance(gl_clip_plane *clip_planes);
uint32_t gather_channel(int orig_chan, uint32_t sampler);
void swizzle_result(ir_texture_opcode op, int dest_components,
fs_reg orig_val, uint32_t sampler);
- fs_inst *emit(fs_inst *inst);
- void emit(exec_list list);
-
- fs_inst *emit(enum opcode opcode);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst,
- const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
- fs_inst *emit(enum opcode opcode, const fs_reg &dst,
- fs_reg src[], int sources);
-
- fs_inst *MOV(const fs_reg &dst, const fs_reg &src);
- fs_inst *NOT(const fs_reg &dst, const fs_reg &src);
- fs_inst *RNDD(const fs_reg &dst, const fs_reg &src);
- fs_inst *RNDE(const fs_reg &dst, const fs_reg &src);
- fs_inst *RNDZ(const fs_reg &dst, const fs_reg &src);
- fs_inst *FRC(const fs_reg &dst, const fs_reg &src);
- fs_inst *ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *MUL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *MACH(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *MAC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SHL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SHR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *ASR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *AND(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *OR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *XOR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *IF(enum brw_predicate predicate);
- fs_inst *IF(const fs_reg &src0, const fs_reg &src1,
- enum brw_conditional_mod condition);
- fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
- enum brw_conditional_mod condition);
- fs_inst *LRP(const fs_reg &dst, const fs_reg &a, const fs_reg &y,
- const fs_reg &x);
- fs_inst *DEP_RESOLVE_MOV(int grf);
- fs_inst *BFREV(const fs_reg &dst, const fs_reg &value);
- fs_inst *BFE(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset,
- const fs_reg &value);
- fs_inst *BFI1(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset);
- fs_inst *BFI2(const fs_reg &dst, const fs_reg &bfi1_dst,
- const fs_reg &insert, const fs_reg &base);
- fs_inst *FBH(const fs_reg &dst, const fs_reg &value);
- fs_inst *FBL(const fs_reg &dst, const fs_reg &value);
- fs_inst *CBIT(const fs_reg &dst, const fs_reg &value);
- fs_inst *MAD(const fs_reg &dst, const fs_reg &c, const fs_reg &b,
- const fs_reg &a);
- fs_inst *ADDC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SUBB(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
- fs_inst *SEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-
int type_size(const struct glsl_type *type);
fs_inst *get_instruction_generating_reg(fs_inst *start,
fs_inst *end,
const fs_reg &reg);
- fs_inst *LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources,
- int header_size);
-
- exec_list VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
- const fs_reg &surf_index,
- const fs_reg &varying_offset,
- uint32_t const_offset);
+ void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
+ const fs_reg &dst,
+ const fs_reg &surf_index,
+ const fs_reg &varying_offset,
+ uint32_t const_offset);
+ void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
- bool run_fs();
- bool run_vs();
+ bool run_fs(bool do_rep_send);
+ bool run_vs(gl_clip_plane *clip_planes);
bool run_cs();
void optimize();
void allocate_registers();
@@ -213,11 +120,8 @@ public:
void assign_vs_urb_setup();
bool assign_regs(bool allow_spilling);
void assign_regs_trivial();
- void get_used_mrfs(bool *mrf_used);
void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
int first_payload_node);
- void setup_mrf_hack_interference(struct ra_graph *g,
- int first_mrf_hack_node);
int choose_spill_reg(struct ra_graph *g);
void spill_reg(int spill_reg);
void split_virtual_grfs();
@@ -254,9 +158,10 @@ public:
fs_inst *inst);
void vfail(const char *msg, va_list args);
void fail(const char *msg, ...);
- void no16(const char *msg, ...);
+ void no16(const char *msg);
void lower_uniform_pull_constant_loads();
bool lower_load_payload();
+ bool lower_integer_multiplication();
bool opt_combine_constants();
void emit_dummy_fs();
@@ -318,58 +223,18 @@ public:
fs_reg emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler);
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
void resolve_source_modifiers(fs_reg *src);
- fs_reg fix_math_operand(fs_reg src);
- fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
- fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
- fs_inst *emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
- const fs_reg &a);
- void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
- const fs_reg &src0, const fs_reg &src1);
void emit_discard_jump();
- /** Copy any live channel from \p src to the first channel of \p dst. */
- void emit_uniformize(const fs_reg &dst, const fs_reg &src);
- bool try_emit_b2f_of_comparison(ir_expression *ir);
- bool try_emit_saturate(ir_expression *ir);
- bool try_emit_line(ir_expression *ir);
- bool try_emit_mad(ir_expression *ir);
bool try_replace_with_sel();
- bool try_opt_frontfacing_ternary(ir_if *ir);
bool opt_peephole_sel();
bool opt_peephole_predicated_break();
bool opt_saturate_propagation();
bool opt_cmod_propagation();
bool opt_zero_samples();
- void emit_bool_to_cond_code(ir_rvalue *condition);
- void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]);
- void emit_if_gen6(ir_if *ir);
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
uint32_t spill_offset, int count);
void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
uint32_t spill_offset, int count);
- void emit_fragment_program_code();
- void setup_fp_regs();
- fs_reg get_fp_src_reg(const prog_src_register *src);
- fs_reg get_fp_dst_reg(const prog_dst_register *dst);
- void emit_fp_alu1(enum opcode opcode,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src);
- void emit_fp_alu2(enum opcode opcode,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src0, fs_reg src1);
- void emit_fp_scalar_write(const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src);
- void emit_fp_scalar_math(enum opcode opcode,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src);
-
- void emit_fp_minmax(const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src0, fs_reg src1);
-
- void emit_fp_sop(enum brw_conditional_mod conditional_mod,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
-
void emit_nir_code();
void nir_setup_inputs(nir_shader *shader);
void nir_setup_outputs(nir_shader *shader);
@@ -383,13 +248,17 @@ public:
void nir_emit_loop(nir_loop *loop);
void nir_emit_block(nir_block *block);
void nir_emit_instr(nir_instr *instr);
- void nir_emit_alu(nir_alu_instr *instr);
- void nir_emit_intrinsic(nir_intrinsic_instr *instr);
- void nir_emit_texture(nir_tex_instr *instr);
- void nir_emit_jump(nir_jump_instr *instr);
+ void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
+ void nir_emit_intrinsic(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
+ void nir_emit_texture(const brw::fs_builder &bld,
+ nir_tex_instr *instr);
+ void nir_emit_jump(const brw::fs_builder &bld,
+ nir_jump_instr *instr);
fs_reg get_nir_src(nir_src src);
fs_reg get_nir_dest(nir_dest dest);
- void emit_percomp(fs_inst *inst, unsigned wr_mask);
+ void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
+ unsigned wr_mask);
bool optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result);
@@ -397,16 +266,21 @@ public:
void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
unsigned exec_size, bool use_2nd_half);
void emit_alpha_test();
- fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
+ fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
+ fs_reg color1, fs_reg color2,
fs_reg src0_alpha, unsigned components,
unsigned exec_size, bool use_2nd_half = false);
void emit_fb_writes();
- void emit_urb_writes();
+ void emit_urb_writes(gl_clip_plane *clip_planes);
void emit_cs_terminate();
+ void emit_barrier();
+
void emit_shader_time_begin();
void emit_shader_time_end();
- fs_inst *SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value);
+ void SHADER_TIME_ADD(const brw::fs_builder &bld,
+ int shader_time_subindex,
+ fs_reg value);
void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
fs_reg dst, fs_reg offset, fs_reg src0,
@@ -415,23 +289,9 @@ public:
void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
fs_reg offset);
- void emit_interpolate_expression(ir_expression *ir);
-
- bool try_rewrite_rhs_to_dst(ir_assignment *ir,
- fs_reg dst,
- fs_reg src,
- fs_inst *pre_rhs_inst,
- fs_inst *last_rhs_inst);
- void emit_assignment_writes(fs_reg &l, fs_reg &r,
- const glsl_type *type, bool predicated);
- void resolve_ud_negate(fs_reg *reg);
- void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);
-
- fs_reg get_timestamp(fs_inst **out_mov);
+ fs_reg get_timestamp(const brw::fs_builder &bld);
struct brw_reg interp_reg(int location, int channel);
- void setup_uniform_values(ir_variable *ir);
- void setup_builtin_uniform_values(ir_variable *ir);
int implied_mrf_writes(fs_inst *inst);
virtual void dump_instructions();
@@ -439,8 +299,6 @@ public:
void dump_instruction(backend_instruction *inst);
void dump_instruction(backend_instruction *inst, FILE *file);
- void visit_atomic_counter_intrinsic(ir_call *ir);
-
const void *const key;
const struct brw_sampler_prog_key_data *key_tex;
@@ -476,7 +334,6 @@ public:
*/
int *push_constant_loc;
- struct hash_table *variable_ht;
fs_reg frag_depth;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
@@ -487,26 +344,18 @@ public:
/** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
unsigned max_grf;
- fs_reg *fp_temp_regs;
- fs_reg *fp_input_regs;
-
fs_reg *nir_locals;
fs_reg *nir_globals;
fs_reg nir_inputs;
fs_reg nir_outputs;
fs_reg *nir_system_values;
- /** @{ debug annotation info */
- const char *current_annotation;
- const void *base_ir;
- /** @} */
-
bool failed;
char *fail_msg;
bool simd16_unsupported;
char *no16_msg;
- /* Result of last visit() method. */
+ /* Result of last visit() method. Still used by emit_texture() */
fs_reg result;
/** Register numbers for thread payload fields. */
@@ -539,7 +388,10 @@ public:
const unsigned dispatch_width; /**< 8 or 16 */
+ int shader_time_index;
+
unsigned promoted_constants;
+ brw::fs_builder bld;
};
/**
@@ -550,7 +402,7 @@ public:
class fs_generator
{
public:
- fs_generator(struct brw_context *brw,
+ fs_generator(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const void *key,
struct brw_stage_prog_data *prog_data,
@@ -572,6 +424,7 @@ private:
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
+ void generate_barrier(fs_inst *inst, struct brw_reg src);
void generate_blorp_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
@@ -644,7 +497,9 @@ private:
bool patch_discard_jumps_to_fb_writes();
- struct brw_context *brw;
+ const struct brw_compiler *compiler;
+ void *log_data; /* Passed to compiler->*_log functions */
+
const struct brw_device_info *devinfo;
struct brw_codegen *p;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
new file mode 100644
index 00000000000..58ac5980da5
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -0,0 +1,652 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_FS_BUILDER_H
+#define BRW_FS_BUILDER_H
+
+#include "brw_ir_fs.h"
+#include "brw_shader.h"
+#include "brw_context.h"
+
+namespace brw {
+ /**
+ * Toolbox to assemble an FS IR program out of individual instructions.
+ *
+ * This object is meant to have an interface consistent with
+ * brw::vec4_builder. They cannot be fully interchangeable because
+ * brw::fs_builder generates scalar code while brw::vec4_builder generates
+ * vector code.
+ */
+ class fs_builder {
+ public:
+ /** Type used in this IR to represent a source of an instruction. */
+ typedef fs_reg src_reg;
+
+ /** Type used in this IR to represent the destination of an instruction. */
+ typedef fs_reg dst_reg;
+
+ /** Type used in this IR to represent an instruction. */
+ typedef fs_inst instruction;
+
+ /**
+ * Construct an fs_builder that inserts instructions into \p shader.
+ * \p dispatch_width gives the native execution width of the program.
+ */
+ fs_builder(backend_shader *shader,
+ unsigned dispatch_width) :
+ shader(shader), block(NULL), cursor(NULL),
+ _dispatch_width(dispatch_width),
+ _group(0),
+ force_writemask_all(false),
+ annotation()
+ {
+ }
+
+ /**
+ * Construct an fs_builder that inserts instructions before \p cursor in
+ * basic block \p block, inheriting other code generation parameters
+ * from this.
+ */
+ fs_builder
+ at(bblock_t *block, exec_node *cursor) const
+ {
+ fs_builder bld = *this;
+ bld.block = block;
+ bld.cursor = cursor;
+ return bld;
+ }
+
+ /**
+ * Construct an fs_builder appending instructions at the end of the
+ * instruction list of the shader, inheriting other code generation
+ * parameters from this.
+ */
+ fs_builder
+ at_end() const
+ {
+ return at(NULL, (exec_node *)&shader->instructions.tail);
+ }
+
+ /**
+ * Construct a builder specifying the default SIMD width and group of
+ * channel enable signals, inheriting other code generation parameters
+ * from this.
+ *
+ * \p n gives the default SIMD width, \p i gives the slot group used for
+ * predication and control flow masking in multiples of \p n channels.
+ */
+ fs_builder
+ group(unsigned n, unsigned i) const
+ {
+ assert(n <= dispatch_width() &&
+ i < dispatch_width() / n);
+ fs_builder bld = *this;
+ bld._dispatch_width = n;
+ bld._group += i * n;
+ return bld;
+ }
+
+ /**
+ * Alias for group() with width equal to eight.
+ */
+ fs_builder
+ half(unsigned i) const
+ {
+ return group(8, i);
+ }
+
+ /**
+ * Construct a builder with per-channel control flow execution masking
+ * disabled if \p b is true. If control flow execution masking is
+ * already disabled this has no effect.
+ */
+ fs_builder
+ exec_all(bool b = true) const
+ {
+ fs_builder bld = *this;
+ if (b)
+ bld.force_writemask_all = true;
+ return bld;
+ }
+
+ /**
+ * Construct a builder with the given debug annotation info.
+ */
+ fs_builder
+ annotate(const char *str, const void *ir = NULL) const
+ {
+ fs_builder bld = *this;
+ bld.annotation.str = str;
+ bld.annotation.ir = ir;
+ return bld;
+ }
+
+ /**
+ * Get the SIMD width in use.
+ */
+ unsigned
+ dispatch_width() const
+ {
+ return _dispatch_width;
+ }
+
+ /**
+ * Allocate a virtual register of natural vector size (one for this IR)
+ * and SIMD width. \p n gives the amount of space to allocate in
+ * dispatch_width units (which is just enough space for one logical
+ * component in this IR).
+ */
+ dst_reg
+ vgrf(enum brw_reg_type type, unsigned n = 1) const
+ {
+ return dst_reg(GRF, shader->alloc.allocate(
+ DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
+ REG_SIZE)),
+ type, dispatch_width());
+ }
+
+ /**
+ * Create a null register of floating type.
+ */
+ dst_reg
+ null_reg_f() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_F));
+ }
+
+ /**
+ * Create a null register of signed integer type.
+ */
+ dst_reg
+ null_reg_d() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_D));
+ }
+
+ /**
+ * Create a null register of unsigned integer type.
+ */
+ dst_reg
+ null_reg_ud() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_UD));
+ }
+
+ /**
+ * Get the mask of SIMD channels enabled by dispatch and not yet
+ * disabled by discard.
+ */
+ src_reg
+ sample_mask_reg() const
+ {
+ const bool uses_kill =
+ (shader->stage == MESA_SHADER_FRAGMENT &&
+ ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill);
+ return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) :
+ uses_kill ? brw_flag_reg(0, 1) :
+ retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
+ }
+
+ /**
+ * Insert an instruction into the program.
+ */
+ instruction *
+ emit(const instruction &inst) const
+ {
+ return emit(new(shader->mem_ctx) instruction(inst));
+ }
+
+ /**
+ * Create and insert a nullary control instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode) const
+ {
+ return emit(instruction(opcode, dispatch_width()));
+ }
+
+ /**
+ * Create and insert a nullary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst) const
+ {
+ return emit(instruction(opcode, dst));
+ }
+
+ /**
+ * Create and insert a unary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
+ {
+ switch (opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ return fix_math_instruction(
+ emit(instruction(opcode, dst.width, dst,
+ fix_math_operand(src0))));
+
+ default:
+ return emit(instruction(opcode, dst.width, dst, src0));
+ }
+ }
+
+ /**
+ * Create and insert a binary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1) const
+ {
+ switch (opcode) {
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ return fix_math_instruction(
+ emit(instruction(opcode, dst.width, dst,
+ fix_math_operand(src0),
+ fix_math_operand(src1))));
+
+ default:
+ return emit(instruction(opcode, dst.width, dst, src0, src1));
+
+ }
+ }
+
+ /**
+ * Create and insert a ternary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1, const src_reg &src2) const
+ {
+ switch (opcode) {
+ case BRW_OPCODE_BFE:
+ case BRW_OPCODE_BFI2:
+ case BRW_OPCODE_MAD:
+ case BRW_OPCODE_LRP:
+ return emit(instruction(opcode, dst.width, dst,
+ fix_3src_operand(src0),
+ fix_3src_operand(src1),
+ fix_3src_operand(src2)));
+
+ default:
+ return emit(instruction(opcode, dst.width, dst, src0, src1, src2));
+ }
+ }
+
+ /**
+ * Insert a preallocated instruction into the program.
+ */
+ instruction *
+ emit(instruction *inst) const
+ {
+ assert(inst->exec_size == dispatch_width() ||
+ force_writemask_all);
+ assert(_group == 0 || _group == 8);
+
+ inst->force_sechalf = (_group == 8);
+ inst->force_writemask_all = force_writemask_all;
+ inst->annotation = annotation.str;
+ inst->ir = annotation.ir;
+
+ if (block)
+ static_cast<instruction *>(cursor)->insert_before(block, inst);
+ else
+ cursor->insert_before(inst);
+
+ return inst;
+ }
+
+ /**
+ * Select \p src0 if the comparison of both sources with the given
+ * conditional mod evaluates to true, otherwise select \p src1.
+ *
+ * Generally useful to get the minimum or maximum of two values.
+ */
+ void
+ emit_minmax(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1, brw_conditional_mod mod) const
+ {
+ if (shader->devinfo->gen >= 6) {
+ set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
+ } else {
+ CMP(null_reg_d(), src0, src1, mod);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ SEL(dst, src0, src1));
+ }
+ }
+
+ /**
+ * Copy any live channel from \p src to the first channel of \p dst.
+ */
+ void
+ emit_uniformize(const dst_reg &dst, const src_reg &src) const
+ {
+ const fs_builder ubld = exec_all();
+ const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
+
+ ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0));
+ ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0),
+ src, component(chan_index, 0));
+ }
+
+ /**
+ * Assorted arithmetic ops.
+ * @{
+ */
+#define ALU1(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0); \
+ }
+
+#define ALU2(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0, src1); \
+ }
+
+#define ALU2_ACC(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+ { \
+ instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
+ inst->writes_accumulator = true; \
+ return inst; \
+ }
+
+#define ALU3(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
+ const src_reg &src2) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
+ }
+
+ ALU2(ADD)
+ ALU2_ACC(ADDC)
+ ALU2(AND)
+ ALU2(ASR)
+ ALU2(AVG)
+ ALU3(BFE)
+ ALU2(BFI1)
+ ALU3(BFI2)
+ ALU1(BFREV)
+ ALU1(CBIT)
+ ALU2(CMPN)
+ ALU3(CSEL)
+ ALU2(DP2)
+ ALU2(DP3)
+ ALU2(DP4)
+ ALU2(DPH)
+ ALU1(F16TO32)
+ ALU1(F32TO16)
+ ALU1(FBH)
+ ALU1(FBL)
+ ALU1(FRC)
+ ALU2(LINE)
+ ALU1(LZD)
+ ALU2(MAC)
+ ALU2_ACC(MACH)
+ ALU3(MAD)
+ ALU1(MOV)
+ ALU2(MUL)
+ ALU1(NOT)
+ ALU2(OR)
+ ALU2(PLN)
+ ALU1(RNDD)
+ ALU1(RNDE)
+ ALU1(RNDU)
+ ALU1(RNDZ)
+ ALU2(SAD2)
+ ALU2_ACC(SADA2)
+ ALU2(SEL)
+ ALU2(SHL)
+ ALU2(SHR)
+ ALU2_ACC(SUBB)
+ ALU2(XOR)
+
+#undef ALU3
+#undef ALU2_ACC
+#undef ALU2
+#undef ALU1
+ /** @} */
+
+ /**
+ * CMP: Sets the low bit of the destination channels with the result
+ * of the comparison, while the upper bits are undefined, and updates
+ * the flag register with the packed 16 bits of the result.
+ */
+ instruction *
+ CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+ brw_conditional_mod condition) const
+ {
+ /* Take the instruction:
+ *
+ * CMP null<d> src0<f> src1<f>
+ *
+ * Original gen4 does type conversion to the destination type
+ * before comparison, producing garbage results for floating
+ * point comparisons.
+ *
+ * The destination type doesn't matter on newer generations,
+ * so we set the type to match src0 so we can compact the
+ * instruction.
+ */
+ return set_condmod(condition,
+ emit(BRW_OPCODE_CMP, retype(dst, src0.type),
+ fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
+ }
+
+ /**
+ * Gen4 predicated IF.
+ */
+ instruction *
+ IF(brw_predicate predicate) const
+ {
+ return set_predicate(predicate, emit(BRW_OPCODE_IF));
+ }
+
+ /**
+ * Emit a linear interpolation instruction.
+ */
+ instruction *
+ LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
+ const src_reg &a) const
+ {
+ if (shader->devinfo->gen >= 6) {
+ /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+ * we need to reorder the operands.
+ */
+ return emit(BRW_OPCODE_LRP, dst, a, y, x);
+
+ } else {
+ /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
+ const dst_reg y_times_a = vgrf(dst.type);
+ const dst_reg one_minus_a = vgrf(dst.type);
+ const dst_reg x_times_one_minus_a = vgrf(dst.type);
+
+ MUL(y_times_a, y, a);
+ ADD(one_minus_a, negate(a), src_reg(1.0f));
+ MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
+ return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
+ }
+ }
+
+ /**
+ * Collect a number of registers in a contiguous range of registers.
+ */
+ instruction *
+ LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
+ unsigned sources, unsigned header_size) const
+ {
+ assert(dst.width % 8 == 0);
+ instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD,
+ dst.width, dst, src, sources));
+ inst->header_size = header_size;
+
+ for (unsigned i = 0; i < header_size; i++)
+ assert(src[i].file != GRF ||
+ src[i].width * type_sz(src[i].type) == 32);
+ inst->regs_written = header_size;
+
+ for (unsigned i = header_size; i < sources; ++i)
+ assert(src[i].file != GRF ||
+ src[i].width == dst.width);
+ inst->regs_written += (sources - header_size) * (dst.width / 8);
+
+ return inst;
+ }
+
+ backend_shader *shader;
+
+ private:
+ /**
+ * Workaround for negation of UD registers. See comment in
+ * fs_generator::generate_code() for more details.
+ */
+ src_reg
+ fix_unsigned_negate(const src_reg &src) const
+ {
+ if (src.type == BRW_REGISTER_TYPE_UD &&
+ src.negate) {
+ dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
+ MOV(temp, src);
+ return src_reg(temp);
+ } else {
+ return src;
+ }
+ }
+
+ /**
+ * Workaround for source register modes not supported by the ternary
+ * instruction encoding.
+ */
+ src_reg
+ fix_3src_operand(const src_reg &src) const
+ {
+ if (src.file == GRF || src.file == UNIFORM || src.stride > 1) {
+ return src;
+ } else {
+ dst_reg expanded = vgrf(src.type);
+ MOV(expanded, src);
+ return expanded;
+ }
+ }
+
+ /**
+ * Workaround for source register modes not supported by the math
+ * instruction.
+ */
+ src_reg
+ fix_math_operand(const src_reg &src) const
+ {
+ /* Can't do hstride == 0 args on gen6 math, so expand it out. We
+ * might be able to do better by doing execsize = 1 math and then
+ * expanding that result out, but we would need to be careful with
+ * masking.
+ *
+ * Gen6 hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
+ *
+ * Gen7 relaxes most of the above restrictions, but still can't use IMM
+ * operands to math
+ */
+ if ((shader->devinfo->gen == 6 &&
+ (src.file == IMM || src.file == UNIFORM ||
+ src.abs || src.negate)) ||
+ (shader->devinfo->gen == 7 && src.file == IMM)) {
+ const dst_reg tmp = vgrf(src.type);
+ MOV(tmp, src);
+ return tmp;
+ } else {
+ return src;
+ }
+ }
+
+ /**
+ * Workaround other weirdness of the math instruction.
+ */
+ instruction *
+ fix_math_instruction(instruction *inst) const
+ {
+ if (shader->devinfo->gen < 6) {
+ inst->base_mrf = 2;
+ inst->mlen = inst->sources * dispatch_width() / 8;
+
+ if (inst->sources > 1) {
+ /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
+ * "Message Payload":
+ *
+ * "Operand0[7]. For the INT DIV functions, this operand is the
+ * denominator."
+ * ...
+ * "Operand1[7]. For the INT DIV functions, this operand is the
+ * numerator."
+ */
+ const bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
+ const fs_reg src0 = is_int_div ? inst->src[1] : inst->src[0];
+ const fs_reg src1 = is_int_div ? inst->src[0] : inst->src[1];
+
+ inst->resize_sources(1);
+ inst->src[0] = src0;
+
+ at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type,
+ dispatch_width()), src1);
+ }
+ }
+
+ return inst;
+ }
+
+ bblock_t *block;
+ exec_node *cursor;
+
+ unsigned _dispatch_width;
+ unsigned _group;
+ bool force_writemask_all;
+
+ /** Debug annotation info. */
+ struct {
+ const char *str;
+ const void *ir;
+ } annotation;
+ };
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index aa62031df73..0af5a915c9f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -38,6 +38,8 @@
#include "brw_fs_live_variables.h"
#include "brw_cfg.h"
+using namespace brw;
+
/* Returns whether an instruction could co-issue if its immediate source were
* replaced with a GRF source.
*/
@@ -270,15 +272,14 @@ fs_visitor::opt_combine_constants()
reg.stride = 0;
for (int i = 0; i < table.len; i++) {
struct imm *imm = &table.imm[i];
-
- fs_inst *mov = MOV(reg, fs_reg(imm->val));
- mov->force_writemask_all = true;
- if (imm->inst) {
- imm->inst->insert_before(imm->block, mov);
- } else {
- backend_instruction *inst = imm->block->last_non_control_flow_inst();
- inst->insert_after(imm->block, mov);
- }
+ /* Insert it either before the instruction that generated the immediate
+ * or after the last non-control flow instruction of the common ancestor.
+ */
+ exec_node *n = (imm->inst ? imm->inst :
+ imm->block->last_non_control_flow_inst()->next);
+ const fs_builder ibld = bld.at(imm->block, n).exec_all();
+
+ ibld.MOV(reg, fs_reg(imm->val));
imm->reg = reg.reg;
imm->subreg_offset = reg.subreg_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 52bfa921dc3..c92aae4b1d6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -541,8 +541,16 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
/* Fit this constant in by commuting the operands.
* Exception: we can't do this for 32-bit integer MUL/MACH
* because it's asymmetric.
+ *
+ * The BSpec says for Broadwell that
+ *
+ * "When multiplying DW x DW, the dst cannot be accumulator."
+ *
+ * Integer MUL with a non-accumulator destination will be lowered
+ * by lower_integer_multiplication(), so don't restrict it.
*/
- if ((inst->opcode == BRW_OPCODE_MUL ||
+ if (((inst->opcode == BRW_OPCODE_MUL &&
+ inst->dst.is_accumulator()) ||
inst->opcode == BRW_OPCODE_MACH) &&
(inst->src[1].type == BRW_REGISTER_TYPE_D ||
inst->src[1].type == BRW_REGISTER_TYPE_UD))
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index db01f8cf7ab..70f0217b93d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -32,6 +32,8 @@
* 13.1 (p378).
*/
+using namespace brw;
+
namespace {
struct aeb_entry : public exec_node {
/** The instruction that generates the expression value. */
@@ -152,28 +154,34 @@ static bool
instructions_match(fs_inst *a, fs_inst *b, bool *negate)
{
return a->opcode == b->opcode &&
+ a->force_writemask_all == b->force_writemask_all &&
+ a->exec_size == b->exec_size &&
+ a->force_sechalf == b->force_sechalf &&
a->saturate == b->saturate &&
a->predicate == b->predicate &&
a->predicate_inverse == b->predicate_inverse &&
a->conditional_mod == b->conditional_mod &&
+ a->flag_subreg == b->flag_subreg &&
a->dst.type == b->dst.type &&
+ a->offset == b->offset &&
+ a->mlen == b->mlen &&
+ a->regs_written == b->regs_written &&
+ a->base_mrf == b->base_mrf &&
+ a->eot == b->eot &&
+ a->header_size == b->header_size &&
+ a->shadow_compare == b->shadow_compare &&
+ a->pi_noperspective == b->pi_noperspective &&
a->sources == b->sources &&
- (a->is_tex() ? (a->offset == b->offset &&
- a->mlen == b->mlen &&
- a->regs_written == b->regs_written &&
- a->base_mrf == b->base_mrf &&
- a->eot == b->eot &&
- a->header_size == b->header_size &&
- a->shadow_compare == b->shadow_compare)
- : true) &&
operands_match(a, b, negate);
}
-static fs_inst *
-create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate)
+static void
+create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
{
int written = inst->regs_written;
int dst_width = inst->dst.width / 8;
+ const fs_builder ubld = bld.group(inst->exec_size, inst->force_sechalf)
+ .exec_all(inst->force_writemask_all);
fs_inst *copy;
if (written > dst_width) {
@@ -189,7 +197,7 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate)
}
assert(src.file == GRF);
- payload = ralloc_array(v->mem_ctx, fs_reg, sources);
+ payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources);
for (int i = 0; i < header_size; i++) {
payload[i] = src;
payload[i].width = 8;
@@ -199,15 +207,12 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate)
payload[i] = src;
src = offset(src, 1);
}
- copy = v->LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
+ copy = ubld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
} else {
- copy = v->MOV(inst->dst, src);
- copy->force_writemask_all = inst->force_writemask_all;
+ copy = ubld.MOV(inst->dst, src);
copy->src[0].negate = negate;
}
assert(copy->regs_written == written);
-
- return copy;
}
bool
@@ -261,9 +266,8 @@ fs_visitor::opt_cse_local(bblock_t *block)
entry->generator->dst.type,
entry->generator->dst.width);
- fs_inst *copy = create_copy_instr(this, entry->generator,
- entry->tmp, false);
- entry->generator->insert_after(block, copy);
+ create_copy_instr(bld.at(block, entry->generator->next),
+ entry->generator, entry->tmp, false);
entry->generator->dst = entry->tmp;
}
@@ -274,9 +278,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
assert(inst->dst.width == entry->generator->dst.width);
assert(inst->dst.type == entry->tmp.type);
- fs_inst *copy = create_copy_instr(this, inst,
- entry->tmp, negate);
- inst->insert_before(block, copy);
+ create_copy_instr(bld.at(block, inst), inst, entry->tmp, negate);
}
/* Set our iterator so that next time through the loop inst->next
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
deleted file mode 100644
index 6518ff60c3b..00000000000
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ /dev/null
@@ -1,742 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** @file brw_fs_fp.cpp
- *
- * Implementation of the compiler for GL_ARB_fragment_program shaders on top
- * of the GLSL compiler backend.
- */
-
-#include "brw_context.h"
-#include "brw_fs.h"
-
-void
-fs_visitor::emit_fp_alu1(enum opcode opcode,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src)
-{
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i))
- emit(opcode, offset(dst, i), offset(src, i));
- }
-}
-
-void
-fs_visitor::emit_fp_alu2(enum opcode opcode,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src0, fs_reg src1)
-{
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i))
- emit(opcode, offset(dst, i),
- offset(src0, i), offset(src1, i));
- }
-}
-
-void
-fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
- fs_reg dst, fs_reg src0, fs_reg src1)
-{
- enum brw_conditional_mod conditionalmod;
- if (fpi->Opcode == OPCODE_MIN)
- conditionalmod = BRW_CONDITIONAL_L;
- else
- conditionalmod = BRW_CONDITIONAL_GE;
-
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i)) {
- emit_minmax(conditionalmod, offset(dst, i),
- offset(src0, i), offset(src1, i));
- }
- }
-}
-
-void
-fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src0, fs_reg src1,
- fs_reg one)
-{
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i)) {
- fs_inst *inst;
-
- emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
- conditional_mod));
-
- inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
- }
-}
-
-void
-fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src)
-{
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i))
- emit(MOV(offset(dst, i), src));
- }
-}
-
-void
-fs_visitor::emit_fp_scalar_math(enum opcode opcode,
- const struct prog_instruction *fpi,
- fs_reg dst, fs_reg src)
-{
- fs_reg temp = vgrf(glsl_type::float_type);
- emit_math(opcode, temp, src);
- emit_fp_scalar_write(fpi, dst, temp);
-}
-
-void
-fs_visitor::emit_fragment_program_code()
-{
- setup_fp_regs();
-
- /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
- * be:
- *
- * sel.f0 dst 1.0 0.0
- *
- * instead of
- *
- * mov dst 0.0
- * mov.f0 dst 1.0
- */
- fs_reg one = vgrf(glsl_type::float_type);
- emit(MOV(one, fs_reg(1.0f)));
-
- for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
- const struct prog_instruction *fpi = &prog->Instructions[insn];
- base_ir = fpi;
-
- fs_reg dst;
- fs_reg src[3];
-
- /* We always emit into a temporary destination register to avoid
- * aliasing issues.
- */
- dst = vgrf(glsl_type::vec4_type);
-
- for (int i = 0; i < 3; i++)
- src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
-
- switch (fpi->Opcode) {
- case OPCODE_ABS:
- src[0].abs = true;
- src[0].negate = false;
- emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
- break;
-
- case OPCODE_ADD:
- emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
- break;
-
- case OPCODE_CMP:
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i)) {
- fs_inst *inst;
-
- emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f),
- BRW_CONDITIONAL_L));
-
- inst = emit(BRW_OPCODE_SEL, offset(dst, i),
- offset(src[1], i), offset(src[2], i));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
- }
- break;
-
- case OPCODE_COS:
- emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
- break;
-
- case OPCODE_DP2:
- case OPCODE_DP3:
- case OPCODE_DP4:
- case OPCODE_DPH: {
- fs_reg mul = vgrf(glsl_type::float_type);
- fs_reg acc = vgrf(glsl_type::float_type);
- int count;
-
- switch (fpi->Opcode) {
- case OPCODE_DP2: count = 2; break;
- case OPCODE_DP3: count = 3; break;
- case OPCODE_DP4: count = 4; break;
- case OPCODE_DPH: count = 3; break;
- default: unreachable("not reached");
- }
-
- emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
- for (int i = 1; i < count; i++) {
- emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
- emit(ADD(acc, acc, mul));
- }
-
- if (fpi->Opcode == OPCODE_DPH)
- emit(ADD(acc, acc, offset(src[1], 3)));
-
- emit_fp_scalar_write(fpi, dst, acc);
- break;
- }
-
- case OPCODE_DST:
- if (fpi->DstReg.WriteMask & WRITEMASK_X)
- emit(MOV(dst, fs_reg(1.0f)));
- if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
- emit(MUL(offset(dst, 1),
- offset(src[0], 1), offset(src[1], 1)));
- }
- if (fpi->DstReg.WriteMask & WRITEMASK_Z)
- emit(MOV(offset(dst, 2), offset(src[0], 2)));
- if (fpi->DstReg.WriteMask & WRITEMASK_W)
- emit(MOV(offset(dst, 3), offset(src[1], 3)));
- break;
-
- case OPCODE_EX2:
- emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
- break;
-
- case OPCODE_FLR:
- emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
- break;
-
- case OPCODE_FRC:
- emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
- break;
-
- case OPCODE_KIL: {
- for (int i = 0; i < 4; i++) {
- /* In most cases the argument to a KIL will be something like
- * TEMP[0].wwww, so there's no point in checking whether .w is < 0
- * 4 times in a row.
- */
- if (i > 0 &&
- GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
- GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
- ((fpi->SrcReg[0].Negate >> i) & 1) ==
- ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
- continue;
- }
-
-
- /* Emit an instruction that's predicated on the current
- * undiscarded pixels, and updates just those pixels to be
- * turned off.
- */
- fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i),
- fs_reg(0.0f), BRW_CONDITIONAL_GE));
- cmp->predicate = BRW_PREDICATE_NORMAL;
- cmp->flag_subreg = 1;
-
- if (devinfo->gen >= 6)
- emit_discard_jump();
- }
- break;
- }
-
- case OPCODE_LG2:
- emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
- break;
-
- case OPCODE_LIT:
- /* From the ARB_fragment_program spec:
- *
- * tmp = VectorLoad(op0);
- * if (tmp.x < 0) tmp.x = 0;
- * if (tmp.y < 0) tmp.y = 0;
- * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
- * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
- * result.x = 1.0;
- * result.y = tmp.x;
- * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
- * result.w = 1.0;
- *
- * Note that we don't do the clamping to +/- 128. We didn't in
- * brw_wm_emit.c either.
- */
- if (fpi->DstReg.WriteMask & WRITEMASK_X)
- emit(MOV(offset(dst, 0), fs_reg(1.0f)));
-
- if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
- fs_inst *inst;
- emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f),
- BRW_CONDITIONAL_LE));
-
- if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
- emit(MOV(offset(dst, 1), offset(src[0], 0)));
- inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
- if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
- emit_math(SHADER_OPCODE_POW, offset(dst, 2),
- offset(src[0], 1), offset(src[0], 3));
-
- inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
- }
-
- if (fpi->DstReg.WriteMask & WRITEMASK_W)
- emit(MOV(offset(dst, 3), fs_reg(1.0f)));
-
- break;
-
- case OPCODE_LRP:
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i)) {
- fs_reg a = offset(src[0], i);
- fs_reg y = offset(src[1], i);
- fs_reg x = offset(src[2], i);
- emit_lrp(offset(dst, i), x, y, a);
- }
- }
- break;
-
- case OPCODE_MAD:
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i)) {
- if (devinfo->gen >= 6) {
- emit(MAD(offset(dst, i), offset(src[2], i),
- offset(src[1], i), offset(src[0], i)));
- } else {
- fs_reg temp = vgrf(glsl_type::float_type);
- emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
- emit(ADD(offset(dst, i), temp, offset(src[2], i)));
- }
- }
- }
- break;
-
- case OPCODE_MAX:
- emit_fp_minmax(fpi, dst, src[0], src[1]);
- break;
-
- case OPCODE_MOV:
- emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
- break;
-
- case OPCODE_MIN:
- emit_fp_minmax(fpi, dst, src[0], src[1]);
- break;
-
- case OPCODE_MUL:
- emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
- break;
-
- case OPCODE_POW: {
- fs_reg temp = vgrf(glsl_type::float_type);
- emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
- emit_fp_scalar_write(fpi, dst, temp);
- break;
- }
-
- case OPCODE_RCP:
- emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
- break;
-
- case OPCODE_RSQ:
- emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
- break;
-
- case OPCODE_SCS:
- if (fpi->DstReg.WriteMask & WRITEMASK_X) {
- emit_math(SHADER_OPCODE_COS, offset(dst, 0),
- offset(src[0], 0));
- }
-
- if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
- emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
- offset(src[0], 1));
- }
- break;
-
- case OPCODE_SGE:
- emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
- break;
-
- case OPCODE_SIN:
- emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
- break;
-
- case OPCODE_SLT:
- emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
- break;
-
- case OPCODE_SUB: {
- fs_reg neg_src1 = src[1];
- neg_src1.negate = !src[1].negate;
-
- emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
- break;
- }
-
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP: {
- ir_texture_opcode op;
- fs_reg lod;
- fs_reg dpdy;
- fs_reg coordinate = src[0];
- fs_reg shadow_c;
- fs_reg sample_index;
- fs_reg texel_offset; /* No offsets; leave as BAD_FILE. */
-
- switch (fpi->Opcode) {
- case OPCODE_TEX:
- op = ir_tex;
- break;
- case OPCODE_TXP: {
- op = ir_tex;
-
- coordinate = vgrf(glsl_type::vec3_type);
- fs_reg invproj = vgrf(glsl_type::float_type);
- emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
- for (int i = 0; i < 3; i++) {
- emit(MUL(offset(coordinate, i),
- offset(src[0], i), invproj));
- }
- break;
- }
- case OPCODE_TXB:
- op = ir_txb;
- lod = offset(src[0], 3);
- break;
- default:
- unreachable("not reached");
- }
-
- int coord_components;
- switch (fpi->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- coord_components = 1;
- break;
-
- case TEXTURE_2D_INDEX:
- case TEXTURE_1D_ARRAY_INDEX:
- case TEXTURE_RECT_INDEX:
- case TEXTURE_EXTERNAL_INDEX:
- coord_components = 2;
- break;
-
- case TEXTURE_3D_INDEX:
- case TEXTURE_2D_ARRAY_INDEX:
- coord_components = 3;
- break;
-
- case TEXTURE_CUBE_INDEX: {
- coord_components = 3;
-
- fs_reg temp = vgrf(glsl_type::float_type);
- fs_reg cubecoord = vgrf(glsl_type::vec3_type);
- fs_reg abscoord = coordinate;
- abscoord.negate = false;
- abscoord.abs = true;
- emit_minmax(BRW_CONDITIONAL_GE, temp,
- offset(abscoord, 0), offset(abscoord, 1));
- emit_minmax(BRW_CONDITIONAL_GE, temp,
- temp, offset(abscoord, 2));
- emit_math(SHADER_OPCODE_RCP, temp, temp);
- for (int i = 0; i < 3; i++) {
- emit(MUL(offset(cubecoord, i),
- offset(coordinate, i), temp));
- }
-
- coordinate = cubecoord;
- break;
- }
-
- default:
- unreachable("not reached");
- }
-
- if (fpi->TexShadow)
- shadow_c = offset(coordinate, 2);
-
- emit_texture(op, glsl_type::vec4_type, coordinate, coord_components,
- shadow_c, lod, dpdy, 0, sample_index,
- reg_undef, /* offset */
- reg_undef, /* mcs */
- 0, /* gather component */
- false, /* is cube array */
- fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
- fpi->TexSrcUnit, fs_reg(fpi->TexSrcUnit),
- fpi->TexSrcUnit);
- dst = this->result;
-
- break;
- }
-
- case OPCODE_SWZ:
- /* Note that SWZ's extended swizzles are handled in the general
- * get_src_reg() code.
- */
- emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
- break;
-
- case OPCODE_XPD:
- for (int i = 0; i < 3; i++) {
- if (fpi->DstReg.WriteMask & (1 << i)) {
- int i1 = (i + 1) % 3;
- int i2 = (i + 2) % 3;
-
- fs_reg temp = vgrf(glsl_type::float_type);
- fs_reg neg_src1_1 = offset(src[1], i1);
- neg_src1_1.negate = !neg_src1_1.negate;
- emit(MUL(temp, offset(src[0], i2), neg_src1_1));
- emit(MUL(offset(dst, i),
- offset(src[0], i1), offset(src[1], i2)));
- emit(ADD(offset(dst, i), offset(dst, i), temp));
- }
- }
- break;
-
- case OPCODE_END:
- break;
-
- default:
- _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
- _mesa_opcode_string(fpi->Opcode));
- }
-
- /* To handle saturates, we emit a MOV with a saturate bit, which
- * optimization should fold into the preceding instructions when safe.
- */
- if (_mesa_num_inst_dst_regs(fpi->Opcode) != 0) {
- fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
-
- for (int i = 0; i < 4; i++) {
- if (fpi->DstReg.WriteMask & (1 << i)) {
- fs_inst *inst = emit(MOV(offset(real_dst, i),
- offset(dst, i)));
- inst->saturate = fpi->SaturateMode;
- }
- }
- }
- }
-
- /* Epilogue:
- *
- * Fragment depth has this strange convention of being the .z component of
- * a vec4. emit_fb_write() wants to see a float value, instead.
- */
- this->current_annotation = "result.depth write";
- if (frag_depth.file != BAD_FILE) {
- fs_reg temp = vgrf(glsl_type::float_type);
- emit(MOV(temp, offset(frag_depth, 2)));
- frag_depth = temp;
- }
-}
-
-void
-fs_visitor::setup_fp_regs()
-{
- /* PROGRAM_TEMPORARY */
- int num_temp = prog->NumTemporaries;
- fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
- for (int i = 0; i < num_temp; i++)
- fp_temp_regs[i] = vgrf(glsl_type::vec4_type);
-
- /* PROGRAM_STATE_VAR etc. */
- if (dispatch_width == 8) {
- for (unsigned p = 0;
- p < prog->Parameters->NumParameters; p++) {
- for (unsigned int i = 0; i < 4; i++) {
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[p][i];
- }
- }
- }
-
- fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
- for (int i = 0; i < VARYING_SLOT_MAX; i++) {
- if (prog->InputsRead & BITFIELD64_BIT(i)) {
- this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
- i);
-
- switch (i) {
- case VARYING_SLOT_POS:
- {
- assert(stage == MESA_SHADER_FRAGMENT);
- gl_fragment_program *fp = (gl_fragment_program*) prog;
- fp_input_regs[i] =
- *emit_fragcoord_interpolation(fp->PixelCenterInteger,
- fp->OriginUpperLeft);
- }
- break;
- case VARYING_SLOT_FACE:
- fp_input_regs[i] = *emit_frontfacing_interpolation();
- break;
- default:
- fp_input_regs[i] = vgrf(glsl_type::vec4_type);
- emit_general_interpolation(fp_input_regs[i], "fp_input",
- glsl_type::vec4_type,
- INTERP_QUALIFIER_NONE,
- i, false, false);
-
- if (i == VARYING_SLOT_FOGC) {
- emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
- emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
- emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
- }
-
- break;
- }
-
- this->current_annotation = NULL;
- }
- }
-}
-
-fs_reg
-fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
-{
- assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
- switch (dst->File) {
- case PROGRAM_TEMPORARY:
- return fp_temp_regs[dst->Index];
-
- case PROGRAM_OUTPUT:
- if (dst->Index == FRAG_RESULT_DEPTH) {
- if (frag_depth.file == BAD_FILE)
- frag_depth = vgrf(glsl_type::vec4_type);
- return frag_depth;
- } else if (dst->Index == FRAG_RESULT_COLOR) {
- if (outputs[0].file == BAD_FILE) {
- outputs[0] = vgrf(glsl_type::vec4_type);
- output_components[0] = 4;
-
- /* Tell emit_fb_writes() to smear fragment.color across all the
- * color attachments.
- */
- for (int i = 1; i < key->nr_color_regions; i++) {
- outputs[i] = outputs[0];
- output_components[i] = output_components[0];
- }
- }
- return outputs[0];
- } else {
- int output_index = dst->Index - FRAG_RESULT_DATA0;
- if (outputs[output_index].file == BAD_FILE) {
- outputs[output_index] = vgrf(glsl_type::vec4_type);
- }
- output_components[output_index] = 4;
- return outputs[output_index];
- }
-
- case PROGRAM_UNDEFINED:
- return fs_reg();
-
- default:
- _mesa_problem(ctx, "bad dst register file: %s\n",
- _mesa_register_file_name((gl_register_file)dst->File));
- return vgrf(glsl_type::vec4_type);
- }
-}
-
-fs_reg
-fs_visitor::get_fp_src_reg(const prog_src_register *src)
-{
- struct gl_program_parameter_list *plist = prog->Parameters;
-
- fs_reg result;
-
- assert(!src->Abs);
-
- switch (src->File) {
- case PROGRAM_UNDEFINED:
- return fs_reg();
- case PROGRAM_TEMPORARY:
- result = fp_temp_regs[src->Index];
- break;
-
- case PROGRAM_INPUT:
- result = fp_input_regs[src->Index];
- break;
-
- case PROGRAM_STATE_VAR:
- case PROGRAM_UNIFORM:
- case PROGRAM_CONSTANT:
- /* We actually want to look at the type in the Parameters list for this,
- * because this lets us upload constant builtin uniforms, as actual
- * constants.
- */
- switch (plist->Parameters[src->Index].Type) {
- case PROGRAM_CONSTANT: {
- result = vgrf(glsl_type::vec4_type);
-
- for (int i = 0; i < 4; i++) {
- emit(MOV(offset(result, i),
- fs_reg(plist->ParameterValues[src->Index][i].f)));
- }
- break;
- }
-
- case PROGRAM_STATE_VAR:
- case PROGRAM_UNIFORM:
- result = fs_reg(UNIFORM, src->Index * 4);
- break;
-
- default:
- _mesa_problem(ctx, "bad uniform src register file: %s\n",
- _mesa_register_file_name((gl_register_file)src->File));
- return vgrf(glsl_type::vec4_type);
- }
- break;
-
- default:
- _mesa_problem(ctx, "bad src register file: %s\n",
- _mesa_register_file_name((gl_register_file)src->File));
- return vgrf(glsl_type::vec4_type);
- }
-
- if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
- fs_reg unswizzled = result;
- result = vgrf(glsl_type::vec4_type);
- for (int i = 0; i < 4; i++) {
- bool negate = src->Negate & (1 << i);
- /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
- * but it costs us nothing to support it.
- */
- int src_swiz = GET_SWZ(src->Swizzle, i);
- if (src_swiz == SWIZZLE_ZERO) {
- emit(MOV(offset(result, i), fs_reg(0.0f)));
- } else if (src_swiz == SWIZZLE_ONE) {
- emit(MOV(offset(result, i),
- negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
- } else {
- fs_reg src = offset(unswizzled, src_swiz);
- if (negate)
- src.negate = !src.negate;
- emit(MOV(offset(result, i), src));
- }
- }
- }
-
- return result;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index a99b7f75b26..2ed0bac6fd9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -121,7 +121,7 @@ brw_reg_from_fs_reg(fs_reg *reg)
return brw_reg;
}
-fs_generator::fs_generator(struct brw_context *brw,
+fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const void *key,
struct brw_stage_prog_data *prog_data,
@@ -130,7 +130,8 @@ fs_generator::fs_generator(struct brw_context *brw,
bool runtime_check_aads_emit,
const char *stage_abbrev)
- : brw(brw), devinfo(brw->intelScreen->devinfo), key(key),
+ : compiler(compiler), log_data(log_data),
+ devinfo(compiler->devinfo), key(key),
prog_data(prog_data),
prog(prog), promoted_constants(promoted_constants),
runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
@@ -401,6 +402,13 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
}
void
+fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
+{
+ brw_barrier(p, src);
+ brw_WAIT(p);
+}
+
+void
fs_generator::generate_blorp_fb_write(fs_inst *inst)
{
brw_fb_WRITE(p,
@@ -779,27 +787,19 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
} else {
/* Non-const sampler index */
- /* Note: this clobbers `dst` as a temporary before emitting the send */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
- struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
-
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- /* Some care required: `sampler` and `temp` may alias:
- * addr = sampler & 0xff
- * temp = (sampler << 8) & 0xf00
- * addr = addr | temp
- */
- brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
- brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
- brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
- brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
- brw_OR(p, addr, addr, temp);
+ /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
+ brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
+ if (base_binding_table_index)
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+ brw_AND(p, addr, addr, brw_imm_ud(0xfff));
brw_pop_insn_state(p);
@@ -941,6 +941,7 @@ fs_generator::generate_ddy(enum opcode opcode,
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_16);
if (unroll_to_simd8) {
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
if (negate_value) {
brw_ADD(p, firsthalf(dst), firsthalf(src1), negate(firsthalf(src0)));
@@ -1600,10 +1601,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case 16:
case 32:
- if (type_sz(inst->dst.type) < sizeof(float))
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- else
+ /* If the instruction writes to more than one register, it needs to
+ * be a "compressed" instruction on Gen <= 5.
+ */
+ if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32)
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ else
+ brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
break;
default:
unreachable("Invalid instruction width");
@@ -2121,6 +2125,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_cs_terminate(inst, src[0]);
break;
+ case SHADER_OPCODE_BARRIER:
+ generate_barrier(inst, src[0]);
+ break;
+
default:
unreachable("Unsupported opcode");
@@ -2166,15 +2174,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
ralloc_free(annotation.ann);
}
- static GLuint msg_id = 0;
- _mesa_gl_debug(&brw->ctx, &msg_id,
- MESA_DEBUG_SOURCE_SHADER_COMPILER,
- MESA_DEBUG_TYPE_OTHER,
- MESA_DEBUG_SEVERITY_NOTIFICATION,
- "%s SIMD%d shader: %d inst, %d loops, %d:%d spills:fills, "
- "Promoted %u constants, compacted %d to %d bytes.\n",
- stage_abbrev, dispatch_width, before_size / 16, loop_count,
- spill_count, fill_count, promoted_constants, before_size, after_size);
+ compiler->shader_debug_log(log_data,
+ "%s SIMD%d shader: %d inst, %d loops, "
+ "%d:%d spills:fills, Promoted %u constants, "
+ "compacted %d to %d bytes.\n",
+ stage_abbrev, dispatch_width, before_size / 16,
+ loop_count, spill_count, fill_count,
+ promoted_constants, before_size, after_size);
return start_offset;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 270131a73d1..a378019af5b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -28,6 +28,8 @@
#include "brw_fs.h"
#include "brw_nir.h"
+using namespace brw;
+
void
fs_visitor::emit_nir_code()
{
@@ -38,12 +40,12 @@ fs_visitor::emit_nir_code()
*/
if (nir->num_inputs > 0) {
- nir_inputs = vgrf(nir->num_inputs);
+ nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
nir_setup_inputs(nir);
}
if (nir->num_outputs > 0) {
- nir_outputs = vgrf(nir->num_outputs);
+ nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
nir_setup_outputs(nir);
}
@@ -58,7 +60,7 @@ fs_visitor::emit_nir_code()
unsigned array_elems =
reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
unsigned size = array_elems * reg->num_components;
- nir_globals[reg->index] = vgrf(size);
+ nir_globals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
}
/* get the main function and emit it */
@@ -93,8 +95,8 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
unsigned array_length = var->type->is_array() ? var->type->length : 1;
for (unsigned i = 0; i < array_length; i++) {
for (unsigned j = 0; j < components; j++) {
- emit(MOV(retype(offset(input, components * i + j), type),
- offset(fs_reg(ATTR, var->data.location + i, type), j)));
+ bld.MOV(retype(offset(input, components * i + j), type),
+ offset(fs_reg(ATTR, var->data.location + i, type), j));
}
}
break;
@@ -107,7 +109,7 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
if (var->data.location == VARYING_SLOT_POS) {
reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
var->data.origin_upper_left);
- emit_percomp(MOV(input, reg), 0xF);
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, input, reg), 0xF);
} else {
emit_general_interpolation(input, var->name, var->type,
(glsl_interp_qualifier) var->data.interpolation,
@@ -218,9 +220,12 @@ fs_visitor::nir_setup_uniform(nir_variable *var)
* our name.
*/
unsigned index = var->data.driver_location;
- for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
+ for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+ if (storage->builtin)
+ continue;
+
if (strncmp(var->name, storage->name, namelen) != 0 ||
(storage->name[namelen] != 0 &&
storage->name[namelen] != '.' &&
@@ -358,7 +363,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
unsigned array_elems =
reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
unsigned size = array_elems * reg->num_components;
- nir_locals[reg->index] = vgrf(size);
+ nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
}
nir_emit_cf_list(&impl->body);
@@ -392,21 +397,21 @@ void
fs_visitor::nir_emit_if(nir_if *if_stmt)
{
/* first, put the condition into f0 */
- fs_inst *inst = emit(MOV(reg_null_d,
+ fs_inst *inst = bld.MOV(bld.null_reg_d(),
retype(get_nir_src(if_stmt->condition),
- BRW_REGISTER_TYPE_D)));
+ BRW_REGISTER_TYPE_D));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
- emit(IF(BRW_PREDICATE_NORMAL));
+ bld.IF(BRW_PREDICATE_NORMAL);
nir_emit_cf_list(&if_stmt->then_list);
/* note: if the else is empty, dead CF elimination will remove it */
- emit(BRW_OPCODE_ELSE);
+ bld.emit(BRW_OPCODE_ELSE);
nir_emit_cf_list(&if_stmt->else_list);
- emit(BRW_OPCODE_ENDIF);
+ bld.emit(BRW_OPCODE_ENDIF);
if (!try_replace_with_sel() && devinfo->gen < 6) {
no16("Can't support (non-uniform) control flow on SIMD16\n");
@@ -420,11 +425,11 @@ fs_visitor::nir_emit_loop(nir_loop *loop)
no16("Can't support (non-uniform) control flow on SIMD16\n");
}
- emit(BRW_OPCODE_DO);
+ bld.emit(BRW_OPCODE_DO);
nir_emit_cf_list(&loop->body);
- emit(BRW_OPCODE_WHILE);
+ bld.emit(BRW_OPCODE_WHILE);
}
void
@@ -438,19 +443,19 @@ fs_visitor::nir_emit_block(nir_block *block)
void
fs_visitor::nir_emit_instr(nir_instr *instr)
{
- this->base_ir = instr;
+ const fs_builder abld = bld.annotate(NULL, instr);
switch (instr->type) {
case nir_instr_type_alu:
- nir_emit_alu(nir_instr_as_alu(instr));
+ nir_emit_alu(abld, nir_instr_as_alu(instr));
break;
case nir_instr_type_intrinsic:
- nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
+ nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
case nir_instr_type_tex:
- nir_emit_texture(nir_instr_as_tex(instr));
+ nir_emit_texture(abld, nir_instr_as_tex(instr));
break;
case nir_instr_type_load_const:
@@ -460,14 +465,12 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
break;
case nir_instr_type_jump:
- nir_emit_jump(nir_instr_as_jump(instr));
+ nir_emit_jump(abld, nir_instr_as_jump(instr));
break;
default:
unreachable("unknown instruction type");
}
-
- this->base_ir = NULL;
}
static brw_reg_type
@@ -540,7 +543,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
tmp.subreg_offset = 2;
tmp.stride = 2;
- fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80)));
+ fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80));
or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
tmp.type = BRW_REGISTER_TYPE_D;
@@ -565,15 +568,15 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
g1_6.negate = true;
}
- emit(OR(tmp, g1_6, fs_reg(0x3f800000)));
+ bld.OR(tmp, g1_6, fs_reg(0x3f800000));
}
- emit(AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)));
+ bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000));
return true;
}
void
-fs_visitor::nir_emit_alu(nir_alu_instr *instr)
+fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
{
struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
fs_inst *inst;
@@ -605,7 +608,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
if (!instr->src[i].src.is_ssa &&
instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
need_extra_copy = true;
- temp = retype(vgrf(4), result.type);
+ temp = bld.vgrf(result.type, 4);
break;
}
}
@@ -615,11 +618,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
continue;
if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
- inst = emit(MOV(offset(temp, i),
- offset(op[0], instr->src[0].swizzle[i])));
+ inst = bld.MOV(offset(temp, i),
+ offset(op[0], instr->src[0].swizzle[i]));
} else {
- inst = emit(MOV(offset(temp, i),
- offset(op[i], instr->src[i].swizzle[0])));
+ inst = bld.MOV(offset(temp, i),
+ offset(op[i], instr->src[i].swizzle[0]));
}
inst->saturate = instr->dest.saturate;
}
@@ -633,7 +636,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
if (!(instr->dest.write_mask & (1 << i)))
continue;
- emit(MOV(offset(result, i), offset(temp, i)));
+ bld.MOV(offset(result, i), offset(temp, i));
}
}
return;
@@ -665,13 +668,13 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
switch (instr->op) {
case nir_op_i2f:
case nir_op_u2f:
- inst = emit(MOV(result, op[0]));
+ inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_f2i:
case nir_op_f2u:
- emit(MOV(result, op[0]));
+ bld.MOV(result, op[0]);
break;
case nir_op_fsign: {
@@ -680,17 +683,17 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
* Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
* zero.
*/
- emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
+ bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
op[0].type = BRW_REGISTER_TYPE_UD;
result.type = BRW_REGISTER_TYPE_UD;
- emit(AND(result_int, op[0], fs_reg(0x80000000u)));
+ bld.AND(result_int, op[0], fs_reg(0x80000000u));
- inst = emit(OR(result_int, result_int, fs_reg(0x3f800000u)));
+ inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u));
inst->predicate = BRW_PREDICATE_NORMAL;
if (instr->dest.saturate) {
- inst = emit(MOV(result, result));
+ inst = bld.MOV(result, result);
inst->saturate = true;
}
break;
@@ -701,120 +704,88 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
* -> non-negative val generates 0x00000000.
* Predicated OR sets 1 if val is positive.
*/
- emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
- emit(ASR(result, op[0], fs_reg(31)));
- inst = emit(OR(result, result, fs_reg(1)));
+ bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G);
+ bld.ASR(result, op[0], fs_reg(31));
+ inst = bld.OR(result, result, fs_reg(1));
inst->predicate = BRW_PREDICATE_NORMAL;
break;
case nir_op_frcp:
- inst = emit_math(SHADER_OPCODE_RCP, result, op[0]);
+ inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fexp2:
- inst = emit_math(SHADER_OPCODE_EXP2, result, op[0]);
+ inst = bld.emit(SHADER_OPCODE_EXP2, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_flog2:
- inst = emit_math(SHADER_OPCODE_LOG2, result, op[0]);
+ inst = bld.emit(SHADER_OPCODE_LOG2, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fsin:
- inst = emit_math(SHADER_OPCODE_SIN, result, op[0]);
+ inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fcos:
- inst = emit_math(SHADER_OPCODE_COS, result, op[0]);
+ inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddx:
if (fs_key->high_quality_derivatives) {
- inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
+ inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]);
} else {
- inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
+ inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]);
}
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddx_fine:
- inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
+ inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddx_coarse:
- inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
+ inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddy:
if (fs_key->high_quality_derivatives) {
- inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
+ fs_reg(fs_key->render_to_fbo));
} else {
- inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
+ fs_reg(fs_key->render_to_fbo));
}
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddy_fine:
- inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
+ fs_reg(fs_key->render_to_fbo));
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddy_coarse:
- inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
+ fs_reg(fs_key->render_to_fbo));
inst->saturate = instr->dest.saturate;
break;
case nir_op_fadd:
case nir_op_iadd:
- inst = emit(ADD(result, op[0], op[1]));
+ inst = bld.ADD(result, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fmul:
- inst = emit(MUL(result, op[0], op[1]));
+ inst = bld.MUL(result, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
- case nir_op_imul: {
- if (devinfo->gen >= 8) {
- emit(MUL(result, op[0], op[1]));
- break;
- } else {
- nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
- nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-
- if (value0 && value0->u[0] < (1 << 16)) {
- if (devinfo->gen < 7) {
- emit(MUL(result, op[0], op[1]));
- } else {
- emit(MUL(result, op[1], op[0]));
- }
- break;
- } else if (value1 && value1->u[0] < (1 << 16)) {
- if (devinfo->gen < 7) {
- emit(MUL(result, op[1], op[0]));
- } else {
- emit(MUL(result, op[0], op[1]));
- }
- break;
- }
- }
-
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(reg_null_d, op[0], op[1]));
- emit(MOV(result, fs_reg(acc)));
+ case nir_op_imul:
+ bld.MUL(result, op[0], op[1]);
break;
- }
case nir_op_imul_high:
case nir_op_umul_high: {
@@ -823,8 +794,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
- fs_inst *mul = emit(MUL(acc, op[0], op[1]));
- emit(MACH(result, op[0], op[1]));
+ fs_inst *mul = bld.MUL(acc, op[0], op[1]);
+ bld.MACH(result, op[0], op[1]);
/* Until Gen8, integer multiplies read 32-bits from one source, and
* 16-bits from the other, and relying on the MACH instruction to
@@ -852,7 +823,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_idiv:
case nir_op_udiv:
- emit_math(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
+ bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
break;
case nir_op_uadd_carry: {
@@ -862,8 +833,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
BRW_REGISTER_TYPE_UD);
- emit(ADDC(reg_null_ud, op[0], op[1]));
- emit(MOV(result, fs_reg(acc)));
+ bld.ADDC(bld.null_reg_ud(), op[0], op[1]);
+ bld.MOV(result, fs_reg(acc));
break;
}
@@ -874,63 +845,63 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
BRW_REGISTER_TYPE_UD);
- emit(SUBB(reg_null_ud, op[0], op[1]));
- emit(MOV(result, fs_reg(acc)));
+ bld.SUBB(bld.null_reg_ud(), op[0], op[1]);
+ bld.MOV(result, fs_reg(acc));
break;
}
case nir_op_umod:
- emit_math(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
+ bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
break;
case nir_op_flt:
case nir_op_ilt:
case nir_op_ult:
- emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_L));
+ bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_L);
break;
case nir_op_fge:
case nir_op_ige:
case nir_op_uge:
- emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE));
+ bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_GE);
break;
case nir_op_feq:
case nir_op_ieq:
- emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z));
+ bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_Z);
break;
case nir_op_fne:
case nir_op_ine:
- emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ));
+ bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ);
break;
case nir_op_inot:
if (devinfo->gen >= 8) {
resolve_source_modifiers(&op[0]);
}
- emit(NOT(result, op[0]));
+ bld.NOT(result, op[0]);
break;
case nir_op_ixor:
if (devinfo->gen >= 8) {
resolve_source_modifiers(&op[0]);
resolve_source_modifiers(&op[1]);
}
- emit(XOR(result, op[0], op[1]));
+ bld.XOR(result, op[0], op[1]);
break;
case nir_op_ior:
if (devinfo->gen >= 8) {
resolve_source_modifiers(&op[0]);
resolve_source_modifiers(&op[1]);
}
- emit(OR(result, op[0], op[1]));
+ bld.OR(result, op[0], op[1]);
break;
case nir_op_iand:
if (devinfo->gen >= 8) {
resolve_source_modifiers(&op[0]);
resolve_source_modifiers(&op[1]);
}
- emit(AND(result, op[0], op[1]));
+ bld.AND(result, op[0], op[1]);
break;
case nir_op_fdot2:
@@ -978,53 +949,53 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
unreachable("not reached: should be handled by ldexp_to_arith()");
case nir_op_fsqrt:
- inst = emit_math(SHADER_OPCODE_SQRT, result, op[0]);
+ inst = bld.emit(SHADER_OPCODE_SQRT, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_frsq:
- inst = emit_math(SHADER_OPCODE_RSQ, result, op[0]);
+ inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_b2i:
- emit(AND(result, op[0], fs_reg(1)));
+ bld.AND(result, op[0], fs_reg(1));
break;
case nir_op_b2f:
- emit(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u)));
+ bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u));
break;
case nir_op_f2b:
- emit(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
+ bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
break;
case nir_op_i2b:
- emit(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
+ bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
break;
case nir_op_ftrunc:
- inst = emit(RNDZ(result, op[0]));
+ inst = bld.RNDZ(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fceil: {
op[0].negate = !op[0].negate;
fs_reg temp = vgrf(glsl_type::float_type);
- emit(RNDD(temp, op[0]));
+ bld.RNDD(temp, op[0]);
temp.negate = true;
- inst = emit(MOV(result, temp));
+ inst = bld.MOV(result, temp);
inst->saturate = instr->dest.saturate;
break;
}
case nir_op_ffloor:
- inst = emit(RNDD(result, op[0]));
+ inst = bld.RNDD(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_ffract:
- inst = emit(FRC(result, op[0]));
+ inst = bld.FRC(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fround_even:
- inst = emit(RNDE(result, op[0]));
+ inst = bld.RNDE(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
@@ -1032,11 +1003,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_imin:
case nir_op_umin:
if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
+ inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_L;
} else {
- emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L));
- inst = emit(SEL(result, op[0], op[1]));
+ bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
+ inst = bld.SEL(result, op[0], op[1]);
inst->predicate = BRW_PREDICATE_NORMAL;
}
inst->saturate = instr->dest.saturate;
@@ -1046,11 +1017,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_imax:
case nir_op_umax:
if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
+ inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_GE;
} else {
- emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE));
- inst = emit(SEL(result, op[0], op[1]));
+ bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
+ inst = bld.SEL(result, op[0], op[1]);
inst->predicate = BRW_PREDICATE_NORMAL;
}
inst->saturate = instr->dest.saturate;
@@ -1069,57 +1040,57 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
unreachable("not reached: should be handled by lower_packing_builtins");
case nir_op_unpack_half_2x16_split_x:
- inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
+ inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_unpack_half_2x16_split_y:
- inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
+ inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fpow:
- inst = emit_math(SHADER_OPCODE_POW, result, op[0], op[1]);
+ inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_bitfield_reverse:
- emit(BFREV(result, op[0]));
+ bld.BFREV(result, op[0]);
break;
case nir_op_bit_count:
- emit(CBIT(result, op[0]));
+ bld.CBIT(result, op[0]);
break;
case nir_op_ufind_msb:
case nir_op_ifind_msb: {
- emit(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]));
+ bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
- emit(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ));
+ bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ);
fs_reg neg_result(result);
neg_result.negate = true;
- inst = emit(ADD(result, neg_result, fs_reg(31)));
+ inst = bld.ADD(result, neg_result, fs_reg(31));
inst->predicate = BRW_PREDICATE_NORMAL;
break;
}
case nir_op_find_lsb:
- emit(FBL(result, op[0]));
+ bld.FBL(result, op[0]);
break;
case nir_op_ubitfield_extract:
case nir_op_ibitfield_extract:
- emit(BFE(result, op[2], op[1], op[0]));
+ bld.BFE(result, op[2], op[1], op[0]);
break;
case nir_op_bfm:
- emit(BFI1(result, op[0], op[1]));
+ bld.BFI1(result, op[0], op[1]);
break;
case nir_op_bfi:
- emit(BFI2(result, op[0], op[1], op[2]));
+ bld.BFI2(result, op[0], op[1], op[2]);
break;
case nir_op_bitfield_insert:
@@ -1127,26 +1098,26 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
"lower_instructions::bitfield_insert_to_bfm_bfi");
case nir_op_ishl:
- emit(SHL(result, op[0], op[1]));
+ bld.SHL(result, op[0], op[1]);
break;
case nir_op_ishr:
- emit(ASR(result, op[0], op[1]));
+ bld.ASR(result, op[0], op[1]);
break;
case nir_op_ushr:
- emit(SHR(result, op[0], op[1]));
+ bld.SHR(result, op[0], op[1]);
break;
case nir_op_pack_half_2x16_split:
- emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
+ bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
break;
case nir_op_ffma:
- inst = emit(MAD(result, op[2], op[1], op[0]));
+ inst = bld.MAD(result, op[2], op[1], op[0]);
inst->saturate = instr->dest.saturate;
break;
case nir_op_flrp:
- inst = emit_lrp(result, op[0], op[1], op[2]);
+ inst = bld.LRP(result, op[0], op[1], op[2]);
inst->saturate = instr->dest.saturate;
break;
@@ -1154,8 +1125,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
if (optimize_frontfacing_ternary(instr, result))
return;
- emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- inst = emit(SEL(result, op[1], op[2]));
+ bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
+ inst = bld.SEL(result, op[1], op[2]);
inst->predicate = BRW_PREDICATE_NORMAL;
break;
@@ -1169,9 +1140,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
if (devinfo->gen <= 5 &&
(instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
fs_reg masked = vgrf(glsl_type::int_type);
- emit(AND(masked, result, fs_reg(1)));
+ bld.AND(masked, result, fs_reg(1));
masked.negate = true;
- emit(MOV(retype(result, BRW_REGISTER_TYPE_D), masked));
+ bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked);
}
}
@@ -1190,8 +1161,8 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
int multiplier = nir_reg->num_components * (v->dispatch_width / 8);
reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type));
- v->emit(v->MUL(*reg.reladdr, v->get_nir_src(*indirect),
- fs_reg(multiplier)));
+ v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect),
+ fs_reg(multiplier));
}
return reg;
@@ -1203,11 +1174,10 @@ fs_visitor::get_nir_src(nir_src src)
if (src.is_ssa) {
assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
- fs_reg reg = vgrf(src.ssa->num_components);
- reg.type = BRW_REGISTER_TYPE_D;
+ fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components);
for (unsigned i = 0; i < src.ssa->num_components; ++i)
- emit(MOV(offset(reg, i), fs_reg(load->value.i[i])));
+ bld.MOV(offset(reg, i), fs_reg(load->value.i[i]));
return reg;
} else {
@@ -1230,24 +1200,25 @@ fs_visitor::get_nir_dest(nir_dest dest)
}
void
-fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
+fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
+ unsigned wr_mask)
{
for (unsigned i = 0; i < 4; i++) {
if (!((wr_mask >> i) & 1))
continue;
- fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
+ fs_inst *new_inst = new(mem_ctx) fs_inst(inst);
new_inst->dst = offset(new_inst->dst, i);
for (unsigned j = 0; j < new_inst->sources; j++)
- if (inst->src[j].file == GRF)
+ if (new_inst->src[j].file == GRF)
new_inst->src[j] = offset(new_inst->src[j], i);
- emit(new_inst);
+ bld.emit(new_inst);
}
}
void
-fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -1265,12 +1236,12 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
*/
fs_inst *cmp;
if (instr->intrinsic == nir_intrinsic_discard_if) {
- cmp = emit(CMP(reg_null_f, get_nir_src(instr->src[0]),
- fs_reg(0), BRW_CONDITIONAL_Z));
+ cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
+ fs_reg(0), BRW_CONDITIONAL_Z);
} else {
fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW));
- cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ));
+ cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ);
}
cmp->predicate = BRW_PREDICATE_NORMAL;
cmp->flag_subreg = 1;
@@ -1307,8 +1278,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
}
case nir_intrinsic_load_front_face:
- emit(MOV(retype(dest, BRW_REGISTER_TYPE_D),
- *emit_frontfacing_interpolation()));
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+ *emit_frontfacing_interpolation());
break;
case nir_intrinsic_load_vertex_id:
@@ -1318,7 +1289,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
assert(vertex_id.file != BAD_FILE);
dest.type = vertex_id.type;
- emit(MOV(dest, vertex_id));
+ bld.MOV(dest, vertex_id);
break;
}
@@ -1326,7 +1297,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
assert(base_vertex.file != BAD_FILE);
dest.type = base_vertex.type;
- emit(MOV(dest, base_vertex));
+ bld.MOV(dest, base_vertex);
break;
}
@@ -1334,7 +1305,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
assert(instance_id.file != BAD_FILE);
dest.type = instance_id.type;
- emit(MOV(dest, instance_id));
+ bld.MOV(dest, instance_id);
break;
}
@@ -1342,7 +1313,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
assert(sample_mask_in.file != BAD_FILE);
dest.type = sample_mask_in.type;
- emit(MOV(dest, sample_mask_in));
+ bld.MOV(dest, sample_mask_in);
break;
}
@@ -1350,8 +1321,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
assert(sample_pos.file != BAD_FILE);
dest.type = sample_pos.type;
- emit(MOV(dest, sample_pos));
- emit(MOV(offset(dest, 1), offset(sample_pos, 1)));
+ bld.MOV(dest, sample_pos);
+ bld.MOV(offset(dest, 1), offset(sample_pos, 1));
break;
}
@@ -1359,7 +1330,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
assert(sample_id.file != BAD_FILE);
dest.type = sample_id.type;
- emit(MOV(dest, sample_id));
+ bld.MOV(dest, sample_id);
break;
}
@@ -1377,16 +1348,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
index -= num_direct_uniforms;
}
- for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(uniform_reg, dest.type), index);
- if (has_indirect)
- src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
- index++;
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ fs_reg src = offset(retype(uniform_reg, dest.type), index);
+ if (has_indirect)
+ src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+ index++;
- emit(MOV(dest, src));
- dest = offset(dest, 1);
- }
+ bld.MOV(dest, src);
+ dest = offset(dest, 1);
}
break;
}
@@ -1417,9 +1386,9 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
* from any live channel.
*/
surf_index = vgrf(glsl_type::uint_type);
- emit(ADD(surf_index, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ubo_start)));
- emit_uniformize(surf_index, surf_index);
+ bld.ADD(surf_index, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+ bld.emit_uniformize(surf_index, surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
@@ -1432,21 +1401,21 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
if (has_indirect) {
/* Turn the byte offset into a dword offset. */
fs_reg base_offset = vgrf(glsl_type::int_type);
- emit(SHR(base_offset, retype(get_nir_src(instr->src[1]),
- BRW_REGISTER_TYPE_D),
- fs_reg(2)));
+ bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
+ BRW_REGISTER_TYPE_D),
+ fs_reg(2));
unsigned vec4_offset = instr->const_index[0] / 4;
for (int i = 0; i < instr->num_components; i++)
- emit(VARYING_PULL_CONSTANT_LOAD(offset(dest, i), surf_index,
- base_offset, vec4_offset + i));
+ VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index,
+ base_offset, vec4_offset + i);
} else {
fs_reg packed_consts = vgrf(glsl_type::float_type);
packed_consts.type = dest.type;
fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
- emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
- surf_index, const_offset_reg);
+ bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
+ surf_index, const_offset_reg);
for (unsigned i = 0; i < instr->num_components; i++) {
packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
@@ -1456,7 +1425,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
*/
assert(packed_consts.subreg_offset < 32);
- emit(MOV(dest, packed_consts));
+ bld.MOV(dest, packed_consts);
dest = offset(dest, 1);
}
}
@@ -1468,17 +1437,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
/* fallthrough */
case nir_intrinsic_load_input: {
unsigned index = 0;
- for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(nir_inputs, dest.type),
- instr->const_index[0] + index);
- if (has_indirect)
- src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
- index++;
-
- emit(MOV(dest, src));
- dest = offset(dest, 1);
- }
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ fs_reg src = offset(retype(nir_inputs, dest.type),
+ instr->const_index[0] + index);
+ if (has_indirect)
+ src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+ index++;
+
+ bld.MOV(dest, src);
+ dest = offset(dest, 1);
}
break;
}
@@ -1510,7 +1477,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
*/
no16("interpolate_at_* not yet supported in SIMD16 mode.");
- fs_reg dst_xy = vgrf(2);
+ fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
/* For most messages, we need one reg of ignored data; the hardware
* requires mlen==1 even when there is no payload. in the per-slot
@@ -1522,7 +1489,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
- inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
+ dst_xy, src, fs_reg(0u));
break;
case nir_intrinsic_interp_var_at_sample: {
@@ -1530,8 +1498,8 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
assert(const_sample);
unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
- fs_reg(msg_data));
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+ fs_reg(msg_data));
break;
}
@@ -1542,17 +1510,17 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
- fs_reg(off_x | (off_y << 4)));
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
+ fs_reg(off_x | (off_y << 4)));
} else {
src = vgrf(glsl_type::ivec2_type);
fs_reg offset_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
fs_reg temp = vgrf(glsl_type::float_type);
- emit(MUL(temp, offset(offset_src, i), fs_reg(16.0f)));
+ bld.MUL(temp, offset(offset_src, i), fs_reg(16.0f));
fs_reg itemp = vgrf(glsl_type::int_type);
- emit(MOV(itemp, temp)); /* float to int */
+ bld.MOV(itemp, temp); /* float to int */
/* Clamp the upper end of the range to +7/16.
* ARB_gpu_shader5 requires that we support a maximum offset
@@ -1569,14 +1537,13 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
* implementation-dependent constant
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
*/
-
- emit(BRW_OPCODE_SEL, offset(src, i), itemp, fs_reg(7))
- ->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
+ set_condmod(BRW_CONDITIONAL_L,
+ bld.SEL(offset(src, i), itemp, fs_reg(7)));
}
mlen = 2;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
- fs_reg(0u));
+ inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
+ fs_reg(0u));
}
break;
}
@@ -1594,7 +1561,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
src.type = dest.type;
- emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
+ bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
dest = offset(dest, 1);
}
break;
@@ -1606,27 +1573,29 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_store_output: {
fs_reg src = get_nir_src(instr->src[0]);
unsigned index = 0;
- for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg new_dest = offset(retype(nir_outputs, src.type),
- instr->const_index[0] + index);
- if (has_indirect)
- src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
- index++;
- emit(MOV(new_dest, src));
- src = offset(src, 1);
- }
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ fs_reg new_dest = offset(retype(nir_outputs, src.type),
+ instr->const_index[0] + index);
+ if (has_indirect)
+ src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
+ index++;
+ bld.MOV(new_dest, src);
+ src = offset(src, 1);
}
break;
}
+ case nir_intrinsic_barrier:
+ emit_barrier();
+ break;
+
default:
unreachable("unknown intrinsic");
}
}
void
-fs_visitor::nir_emit_texture(nir_tex_instr *instr)
+fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
{
uint32_t set = instr->sampler_set;
uint32_t binding = instr->sampler_index;
@@ -1650,7 +1619,8 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
instr->is_array;
- int lod_components = 0, offset_components = 0;
+ int lod_components = 0;
+ int UNUSED offset_components = 0;
fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
@@ -1719,8 +1689,8 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
/* Emit code to evaluate the actual indexing expression */
sampler_reg = vgrf(glsl_type::uint_type);
- emit(ADD(sampler_reg, src, fs_reg(sampler)));
- emit_uniformize(sampler_reg, sampler_reg);
+ bld.ADD(sampler_reg, src, fs_reg(sampler));
+ bld.emit_uniformize(sampler_reg, sampler_reg);
break;
}
@@ -1789,18 +1759,19 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr)
fs_reg dest = get_nir_dest(instr->dest);
dest.type = this->result.type;
unsigned num_components = nir_tex_instr_dest_size(instr);
- emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, dest, this->result),
+ (1 << num_components) - 1);
}
void
-fs_visitor::nir_emit_jump(nir_jump_instr *instr)
+fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr)
{
switch (instr->type) {
case nir_jump_break:
- emit(BRW_OPCODE_BREAK);
+ bld.emit(BRW_OPCODE_BREAK);
break;
case nir_jump_continue:
- emit(BRW_OPCODE_CONTINUE);
+ bld.emit(BRW_OPCODE_CONTINUE);
break;
case nir_jump_return:
default:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
index cf3da7b1882..d92d4bbd81d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
@@ -85,9 +85,9 @@ fs_visitor::opt_peephole_predicated_break()
* instruction to set the flag register.
*/
if (devinfo->gen == 6 && if_inst->conditional_mod) {
- fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod);
- if_inst->insert_before(if_block, cmp_inst);
+ bld.at(if_block, if_inst)
+ .CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1],
+ if_inst->conditional_mod);
jump_inst->predicate = BRW_PREDICATE_NORMAL;
} else {
jump_inst->predicate = if_inst->predicate;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 582d0993f1c..364fc4a5ad2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -30,6 +30,8 @@
#include "glsl/glsl_types.h"
#include "glsl/ir_optimization.h"
+using namespace brw;
+
static void
assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
{
@@ -468,14 +470,14 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
* see if we can actually use MRFs to do spills without overwriting normal MRF
* contents.
*/
-void
-fs_visitor::get_used_mrfs(bool *mrf_used)
+static void
+get_used_mrfs(fs_visitor *v, bool *mrf_used)
{
- int reg_width = dispatch_width / 8;
+ int reg_width = v->dispatch_width / 8;
memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool));
- foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
if (inst->dst.file == MRF) {
int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
mrf_used[reg] = true;
@@ -489,7 +491,7 @@ fs_visitor::get_used_mrfs(bool *mrf_used)
}
if (inst->mlen > 0) {
- for (int i = 0; i < implied_mrf_writes(inst); i++) {
+ for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
mrf_used[inst->base_mrf + i] = true;
}
}
@@ -500,12 +502,14 @@ fs_visitor::get_used_mrfs(bool *mrf_used)
* Sets interference between virtual GRFs and usage of the high GRFs for SEND
* messages (treated as MRFs in code generation).
*/
-void
-fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
+static void
+setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
+ int first_mrf_node, int *first_used_mrf)
{
bool mrf_used[BRW_MAX_MRF];
- get_used_mrfs(mrf_used);
+ get_used_mrfs(v, mrf_used);
+ *first_used_mrf = BRW_MAX_MRF;
for (int i = 0; i < BRW_MAX_MRF; i++) {
/* Mark each MRF reg node as being allocated to its physical register.
*
@@ -518,7 +522,10 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
* that are used as conflicting with all virtual GRFs.
*/
if (mrf_used[i]) {
- for (unsigned j = 0; j < this->alloc.count; j++) {
+ if (i < *first_used_mrf)
+ *first_used_mrf = i;
+
+ for (unsigned j = 0; j < v->alloc.count; j++) {
ra_add_node_interference(g, first_mrf_node + i, j);
}
}
@@ -528,7 +535,6 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
bool
fs_visitor::assign_regs(bool allow_spilling)
{
- struct brw_compiler *compiler = brw->intelScreen->compiler;
/* Most of this allocation was written for a reg_width of 1
* (dispatch_width == 8). In extending to SIMD16, the code was
* left in place and it was converted to have the hardware
@@ -584,7 +590,9 @@ fs_visitor::assign_regs(bool allow_spilling)
setup_payload_interference(g, payload_node_count, first_payload_node);
if (devinfo->gen >= 7) {
- setup_mrf_hack_interference(g, first_mrf_hack_node);
+ int first_used_mrf = BRW_MAX_MRF;
+ setup_mrf_hack_interference(this, g, first_mrf_hack_node,
+ &first_used_mrf);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
/* When we do send-from-GRF for FB writes, we need to ensure that
@@ -600,6 +608,13 @@ fs_visitor::assign_regs(bool allow_spilling)
if (inst->eot) {
int size = alloc.sizes[inst->src[0].reg];
int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
+
+ /* If something happened to spill, we want to push the EOT send
+ * register early enough in the register file that we don't
+ * conflict with any used MRF hack registers.
+ */
+ reg -= BRW_MAX_MRF - first_used_mrf;
+
ra_set_node_reg(g, inst->src[0].reg, reg);
break;
}
@@ -696,25 +711,24 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst,
dst.width = 16;
}
+ const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
+ .group(reg_size * 8, 0)
+ .at(block, inst);
+
for (int i = 0; i < count / reg_size; i++) {
/* The gen7 descriptor-based offset is 12 bits of HWORD units. */
bool gen7_read = devinfo->gen >= 7 && spill_offset < (1 << 12) * REG_SIZE;
-
- fs_inst *unspill_inst =
- new(mem_ctx) fs_inst(gen7_read ?
- SHADER_OPCODE_GEN7_SCRATCH_READ :
- SHADER_OPCODE_GEN4_SCRATCH_READ,
- dst);
+ fs_inst *unspill_inst = ibld.emit(gen7_read ?
+ SHADER_OPCODE_GEN7_SCRATCH_READ :
+ SHADER_OPCODE_GEN4_SCRATCH_READ,
+ dst);
unspill_inst->offset = spill_offset;
- unspill_inst->ir = inst->ir;
- unspill_inst->annotation = inst->annotation;
unspill_inst->regs_written = reg_size;
if (!gen7_read) {
unspill_inst->base_mrf = 14;
unspill_inst->mlen = 1; /* header contains offset */
}
- inst->insert_before(block, unspill_inst);
dst.reg_offset += reg_size;
spill_offset += reg_size * REG_SIZE;
@@ -732,17 +746,17 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src,
reg_size = 2;
}
+ const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
+ .group(reg_size * 8, 0)
+ .at(block, inst->next);
+
for (int i = 0; i < count / reg_size; i++) {
fs_inst *spill_inst =
- new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
- reg_size * 8, reg_null_f, src);
+ ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, bld.null_reg_f(), src);
src.reg_offset += reg_size;
spill_inst->offset = spill_offset + i * reg_size * REG_SIZE;
- spill_inst->ir = inst->ir;
- spill_inst->annotation = inst->annotation;
spill_inst->mlen = 1 + reg_size; /* header, value */
spill_inst->base_mrf = spill_base_mrf;
- inst->insert_after(block, spill_inst);
}
}
@@ -839,7 +853,7 @@ fs_visitor::spill_reg(int spill_reg)
*/
if (!spilled_any_registers) {
bool mrf_used[BRW_MAX_MRF];
- get_used_mrfs(mrf_used);
+ get_used_mrfs(this, mrf_used);
for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) {
if (mrf_used[i]) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
index 52aa5590c2e..8660ec08b8f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
@@ -37,6 +37,8 @@
*/
#define MAX_MOVS 8 /**< The maximum number of MOVs to attempt to match. */
+using namespace brw;
+
/**
* Scans forwards from an IF counting consecutive MOV instructions in the
* "then" and "else" blocks of the if statement.
@@ -153,9 +155,6 @@ fs_visitor::opt_peephole_sel()
if (movs == 0)
continue;
- fs_inst *sel_inst[MAX_MOVS] = { NULL };
- fs_inst *mov_imm_inst[MAX_MOVS] = { NULL };
-
enum brw_predicate predicate;
bool predicate_inverse;
if (devinfo->gen == 6 && if_inst->conditional_mod) {
@@ -188,9 +187,21 @@ fs_visitor::opt_peephole_sel()
movs = i;
break;
}
+ }
+
+ if (movs == 0)
+ continue;
+
+ const fs_builder ibld = bld.at(block, if_inst);
+ /* Emit a CMP if our IF used the embedded comparison */
+ if (devinfo->gen == 6 && if_inst->conditional_mod)
+ ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
+ if_inst->conditional_mod);
+
+ for (int i = 0; i < movs; i++) {
if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) {
- sel_inst[i] = MOV(then_mov[i]->dst, then_mov[i]->src[0]);
+ ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]);
} else {
/* Only the last source register can be a constant, so if the MOV
* in the "then" clause uses a constant, we need to put it in a
@@ -200,29 +211,13 @@ fs_visitor::opt_peephole_sel()
if (src0.file == IMM) {
src0 = vgrf(glsl_type::float_type);
src0.type = then_mov[i]->src[0].type;
- mov_imm_inst[i] = MOV(src0, then_mov[i]->src[0]);
+ ibld.MOV(src0, then_mov[i]->src[0]);
}
- sel_inst[i] = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]);
- sel_inst[i]->predicate = predicate;
- sel_inst[i]->predicate_inverse = predicate_inverse;
+ set_predicate_inv(predicate, predicate_inverse,
+ ibld.SEL(then_mov[i]->dst, src0,
+ else_mov[i]->src[0]));
}
- }
-
- if (movs == 0)
- continue;
-
- /* Emit a CMP if our IF used the embedded comparison */
- if (devinfo->gen == 6 && if_inst->conditional_mod) {
- fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod);
- if_inst->insert_before(block, cmp_inst);
- }
-
- for (int i = 0; i < movs; i++) {
- if (mov_imm_inst[i])
- if_inst->insert_before(block, mov_imm_inst[i]);
- if_inst->insert_before(block, sel_inst[i]);
then_mov[i]->remove(then_block);
else_mov[i]->remove(else_block);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index e1f47d4ec44..9a4bad6bcf5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -47,6 +47,7 @@
#include "glsl/ir_optimization.h"
#include "program/sampler.h"
+using namespace brw;
fs_reg *
fs_visitor::emit_vs_system_value(int location)
@@ -76,1371 +77,6 @@ fs_visitor::emit_vs_system_value(int location)
return reg;
}
-void
-fs_visitor::visit(ir_variable *ir)
-{
- fs_reg *reg = NULL;
-
- if (variable_storage(ir))
- return;
-
- if (ir->data.mode == ir_var_shader_in) {
- assert(ir->data.location != -1);
- if (stage == MESA_SHADER_VERTEX) {
- reg = new(this->mem_ctx)
- fs_reg(ATTR, ir->data.location,
- brw_type_for_base_type(ir->type->get_scalar_type()));
- } else if (ir->data.location == VARYING_SLOT_POS) {
- reg = emit_fragcoord_interpolation(ir->data.pixel_center_integer,
- ir->data.origin_upper_left);
- } else if (ir->data.location == VARYING_SLOT_FACE) {
- reg = emit_frontfacing_interpolation();
- } else {
- reg = new(this->mem_ctx) fs_reg(vgrf(ir->type));
- emit_general_interpolation(*reg, ir->name, ir->type,
- (glsl_interp_qualifier) ir->data.interpolation,
- ir->data.location, ir->data.centroid,
- ir->data.sample);
- }
- assert(reg);
- hash_table_insert(this->variable_ht, reg, ir);
- return;
- } else if (ir->data.mode == ir_var_shader_out) {
- reg = new(this->mem_ctx) fs_reg(vgrf(ir->type));
-
- if (stage == MESA_SHADER_VERTEX) {
- int vector_elements =
- ir->type->is_array() ? ir->type->fields.array->vector_elements
- : ir->type->vector_elements;
-
- for (int i = 0; i < (type_size(ir->type) + 3) / 4; i++) {
- int output = ir->data.location + i;
- this->outputs[output] = *reg;
- this->outputs[output].reg_offset = i * 4;
- this->output_components[output] = vector_elements;
- }
-
- } else if (ir->data.index > 0) {
- assert(ir->data.location == FRAG_RESULT_DATA0);
- assert(ir->data.index == 1);
- this->dual_src_output = *reg;
- this->do_dual_src = true;
- } else if (ir->data.location == FRAG_RESULT_COLOR) {
- /* Writing gl_FragColor outputs to all color regions. */
- assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
- this->outputs[i] = *reg;
- this->output_components[i] = 4;
- }
- } else if (ir->data.location == FRAG_RESULT_DEPTH) {
- this->frag_depth = *reg;
- } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) {
- this->sample_mask = *reg;
- } else {
- /* gl_FragData or a user-defined FS output */
- assert(ir->data.location >= FRAG_RESULT_DATA0 &&
- ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
-
- int vector_elements =
- ir->type->is_array() ? ir->type->fields.array->vector_elements
- : ir->type->vector_elements;
-
- /* General color output. */
- for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
- int output = ir->data.location - FRAG_RESULT_DATA0 + i;
- this->outputs[output] = offset(*reg, vector_elements * i);
- this->output_components[output] = vector_elements;
- }
- }
- } else if (ir->data.mode == ir_var_uniform) {
- int param_index = uniforms;
-
- /* Thanks to the lower_ubo_reference pass, we will see only
- * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
- * variables, so no need for them to be in variable_ht.
- *
- * Some uniforms, such as samplers and atomic counters, have no actual
- * storage, so we should ignore them.
- */
- if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
- return;
-
- if (dispatch_width == 16) {
- if (!variable_storage(ir)) {
- fail("Failed to find uniform '%s' in SIMD16\n", ir->name);
- }
- return;
- }
-
- param_size[param_index] = type_size(ir->type);
- if (!strncmp(ir->name, "gl_", 3)) {
- setup_builtin_uniform_values(ir);
- } else {
- setup_uniform_values(ir);
- }
-
- reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
- reg->type = brw_type_for_base_type(ir->type);
-
- } else if (ir->data.mode == ir_var_system_value) {
- switch (ir->data.location) {
- case SYSTEM_VALUE_BASE_VERTEX:
- case SYSTEM_VALUE_VERTEX_ID:
- case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
- case SYSTEM_VALUE_INSTANCE_ID:
- reg = emit_vs_system_value(ir->data.location);
- break;
- case SYSTEM_VALUE_SAMPLE_POS:
- reg = emit_samplepos_setup();
- break;
- case SYSTEM_VALUE_SAMPLE_ID:
- reg = emit_sampleid_setup();
- break;
- case SYSTEM_VALUE_SAMPLE_MASK_IN:
- assert(devinfo->gen >= 7);
- reg = new(mem_ctx)
- fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
- BRW_REGISTER_TYPE_D));
- break;
- }
- }
-
- if (!reg)
- reg = new(this->mem_ctx) fs_reg(vgrf(ir->type));
-
- hash_table_insert(this->variable_ht, reg, ir);
-}
-
-void
-fs_visitor::visit(ir_dereference_variable *ir)
-{
- fs_reg *reg = variable_storage(ir->var);
-
- if (!reg) {
- fail("Failed to find variable storage for %s\n", ir->var->name);
- this->result = fs_reg(reg_null_d);
- return;
- }
- this->result = *reg;
-}
-
-void
-fs_visitor::visit(ir_dereference_record *ir)
-{
- const glsl_type *struct_type = ir->record->type;
-
- ir->record->accept(this);
-
- unsigned int off = 0;
- for (unsigned int i = 0; i < struct_type->length; i++) {
- if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
- break;
- off += type_size(struct_type->fields.structure[i].type);
- }
- this->result = offset(this->result, off);
- this->result.type = brw_type_for_base_type(ir->type);
-}
-
-void
-fs_visitor::visit(ir_dereference_array *ir)
-{
- ir_constant *constant_index;
- fs_reg src;
- int element_size = type_size(ir->type);
-
- constant_index = ir->array_index->as_constant();
-
- ir->array->accept(this);
- src = this->result;
- src.type = brw_type_for_base_type(ir->type);
-
- if (constant_index) {
- if (src.file == ATTR) {
- /* Attribute arrays get loaded as one vec4 per element. In that case
- * offset the source register.
- */
- src.reg += constant_index->value.i[0];
- } else {
- assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG);
- src = offset(src, constant_index->value.i[0] * element_size);
- }
- } else {
- /* Variable index array dereference. We attach the variable index
- * component to the reg as a pointer to a register containing the
- * offset. Currently only uniform arrays are supported in this patch,
- * and that reladdr pointer is resolved by
- * move_uniform_array_access_to_pull_constants(). All other array types
- * are lowered by lower_variable_index_to_cond_assign().
- */
- ir->array_index->accept(this);
-
- fs_reg index_reg;
- index_reg = vgrf(glsl_type::int_type);
- emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
-
- if (src.reladdr) {
- emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
- }
-
- src.reladdr = ralloc(mem_ctx, fs_reg);
- memcpy(src.reladdr, &index_reg, sizeof(index_reg));
- }
- this->result = src;
-}
-
-fs_inst *
-fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
- const fs_reg &a)
-{
- if (devinfo->gen < 6) {
- /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
- fs_reg y_times_a = vgrf(glsl_type::float_type);
- fs_reg one_minus_a = vgrf(glsl_type::float_type);
- fs_reg x_times_one_minus_a = vgrf(glsl_type::float_type);
-
- emit(MUL(y_times_a, y, a));
-
- fs_reg negative_a = a;
- negative_a.negate = !a.negate;
- emit(ADD(one_minus_a, negative_a, fs_reg(1.0f)));
- emit(MUL(x_times_one_minus_a, x, one_minus_a));
-
- return emit(ADD(dst, x_times_one_minus_a, y_times_a));
- } else {
- /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
- * we need to reorder the operands.
- */
- return emit(LRP(dst, a, y, x));
- }
-}
-
-void
-fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
- const fs_reg &src0, const fs_reg &src1)
-{
- assert(conditionalmod == BRW_CONDITIONAL_GE ||
- conditionalmod == BRW_CONDITIONAL_L);
-
- fs_inst *inst;
-
- if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(reg_null_d, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-}
-
-void
-fs_visitor::emit_uniformize(const fs_reg &dst, const fs_reg &src)
-{
- const fs_reg chan_index = vgrf(glsl_type::uint_type);
-
- emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0))
- ->force_writemask_all = true;
- emit(SHADER_OPCODE_BROADCAST, component(dst, 0),
- src, component(chan_index, 0))
- ->force_writemask_all = true;
-}
-
-bool
-fs_visitor::try_emit_saturate(ir_expression *ir)
-{
- if (ir->operation != ir_unop_saturate)
- return false;
-
- ir_rvalue *sat_val = ir->operands[0];
-
- fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
-
- sat_val->accept(this);
- fs_reg src = this->result;
-
- fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
-
- /* If the last instruction from our accept() generated our
- * src, just set the saturate flag instead of emmitting a separate mov.
- */
- fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
- if (modify && modify->regs_written == modify->dst.width / 8 &&
- modify->can_do_saturate()) {
- modify->saturate = true;
- this->result = src;
- return true;
- }
-
- return false;
-}
-
-bool
-fs_visitor::try_emit_line(ir_expression *ir)
-{
- /* LINE's src0 must be of type float. */
- if (ir->type != glsl_type::float_type)
- return false;
-
- ir_rvalue *nonmul = ir->operands[1];
- ir_expression *mul = ir->operands[0]->as_expression();
-
- if (!mul || mul->operation != ir_binop_mul) {
- nonmul = ir->operands[0];
- mul = ir->operands[1]->as_expression();
-
- if (!mul || mul->operation != ir_binop_mul)
- return false;
- }
-
- ir_constant *const_add = nonmul->as_constant();
- if (!const_add)
- return false;
-
- int add_operand_vf = brw_float_to_vf(const_add->value.f[0]);
- if (add_operand_vf == -1)
- return false;
-
- ir_rvalue *non_const_mul = mul->operands[1];
- ir_constant *const_mul = mul->operands[0]->as_constant();
- if (!const_mul) {
- const_mul = mul->operands[1]->as_constant();
-
- if (!const_mul)
- return false;
-
- non_const_mul = mul->operands[0];
- }
-
- int mul_operand_vf = brw_float_to_vf(const_mul->value.f[0]);
- if (mul_operand_vf == -1)
- return false;
-
- non_const_mul->accept(this);
- fs_reg src1 = this->result;
-
- fs_reg src0 = vgrf(ir->type);
- emit(BRW_OPCODE_MOV, src0,
- fs_reg((uint8_t)mul_operand_vf, 0, 0, (uint8_t)add_operand_vf));
-
- this->result = vgrf(ir->type);
- emit(BRW_OPCODE_LINE, this->result, src0, src1);
- return true;
-}
-
-bool
-fs_visitor::try_emit_mad(ir_expression *ir)
-{
- /* 3-src instructions were introduced in gen6. */
- if (devinfo->gen < 6)
- return false;
-
- /* MAD can only handle floating-point data. */
- if (ir->type != glsl_type::float_type)
- return false;
-
- ir_rvalue *nonmul;
- ir_expression *mul;
- bool mul_negate, mul_abs;
-
- for (int i = 0; i < 2; i++) {
- mul_negate = false;
- mul_abs = false;
-
- mul = ir->operands[i]->as_expression();
- nonmul = ir->operands[1 - i];
-
- if (mul && mul->operation == ir_unop_abs) {
- mul = mul->operands[0]->as_expression();
- mul_abs = true;
- } else if (mul && mul->operation == ir_unop_neg) {
- mul = mul->operands[0]->as_expression();
- mul_negate = true;
- }
-
- if (mul && mul->operation == ir_binop_mul)
- break;
- }
-
- if (!mul || mul->operation != ir_binop_mul)
- return false;
-
- nonmul->accept(this);
- fs_reg src0 = this->result;
-
- mul->operands[0]->accept(this);
- fs_reg src1 = this->result;
- src1.negate ^= mul_negate;
- src1.abs = mul_abs;
- if (mul_abs)
- src1.negate = false;
-
- mul->operands[1]->accept(this);
- fs_reg src2 = this->result;
- src2.abs = mul_abs;
- if (mul_abs)
- src2.negate = false;
-
- this->result = vgrf(ir->type);
- emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
-
- return true;
-}
-
-bool
-fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir)
-{
- /* On platforms that do not natively generate 0u and ~0u for Boolean
- * results, b2f expressions that look like
- *
- * f = b2f(expr cmp 0)
- *
- * will generate better code by pretending the expression is
- *
- * f = ir_triop_csel(0.0, 1.0, expr cmp 0)
- *
- * This is because the last instruction of "expr" can generate the
- * condition code for the "cmp 0". This avoids having to do the "-(b & 1)"
- * trick to generate 0u or ~0u for the Boolean result. This means code like
- *
- * mov(16) g16<1>F 1F
- * mul.ge.f0(16) null g6<8,8,1>F g14<8,8,1>F
- * (+f0) sel(16) m6<1>F g16<8,8,1>F 0F
- *
- * will be generated instead of
- *
- * mul(16) g2<1>F g12<8,8,1>F g4<8,8,1>F
- * cmp.ge.f0(16) g2<1>D g4<8,8,1>F 0F
- * and(16) g4<1>D g2<8,8,1>D 1D
- * and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD
- *
- * When the comparison is != 0.0 using the knowledge that the false case
- * already results in zero would allow better code generation by possibly
- * avoiding a load-immediate instruction.
- */
- ir_expression *cmp = ir->operands[0]->as_expression();
- if (cmp == NULL)
- return false;
-
- if (cmp->operation == ir_binop_nequal) {
- for (unsigned i = 0; i < 2; i++) {
- ir_constant *c = cmp->operands[i]->as_constant();
- if (c == NULL || !c->is_zero())
- continue;
-
- ir_expression *expr = cmp->operands[i ^ 1]->as_expression();
- if (expr != NULL) {
- fs_reg op[2];
-
- for (unsigned j = 0; j < 2; j++) {
- cmp->operands[j]->accept(this);
- op[j] = this->result;
-
- resolve_ud_negate(&op[j]);
- }
-
- emit_bool_to_cond_code_of_reg(cmp, op);
-
- /* In this case we know when the condition is true, op[i ^ 1]
- * contains zero. Invert the predicate, use op[i ^ 1] as src0,
- * and immediate 1.0f as src1.
- */
- this->result = vgrf(ir->type);
- op[i ^ 1].type = BRW_REGISTER_TYPE_F;
-
- fs_inst *inst = emit(SEL(this->result, op[i ^ 1], fs_reg(1.0f)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = true;
- return true;
- }
- }
- }
-
- emit_bool_to_cond_code(cmp);
-
- fs_reg temp = vgrf(ir->type);
- emit(MOV(temp, fs_reg(1.0f)));
-
- this->result = vgrf(ir->type);
- fs_inst *inst = emit(SEL(this->result, temp, fs_reg(0.0f)));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- return true;
-}
-
-static int
-pack_pixel_offset(float x)
-{
- /* Clamp upper end of the range to +7/16. See explanation in non-constant
- * offset case below. */
- int n = MIN2((int)(x * 16), 7);
- return n & 0xf;
-}
-
-void
-fs_visitor::emit_interpolate_expression(ir_expression *ir)
-{
- /* in SIMD16 mode, the pixel interpolator returns coords interleaved
- * 8 channels at a time, same as the barycentric coords presented in
- * the FS payload. this requires a bit of extra work to support.
- */
- no16("interpolate_at_* not yet supported in SIMD16 mode.");
-
- assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
- ir_dereference * deref = ir->operands[0]->as_dereference();
- ir_swizzle * swiz = NULL;
- if (!deref) {
- /* the api does not allow a swizzle here, but the varying packing code
- * may have pushed one into here.
- */
- swiz = ir->operands[0]->as_swizzle();
- assert(swiz);
- deref = swiz->val->as_dereference();
- }
- assert(deref);
- ir_variable * var = deref->variable_referenced();
- assert(var);
-
- /* 1. collect interpolation factors */
-
- fs_reg dst_xy = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1));
-
- /* for most messages, we need one reg of ignored data; the hardware requires mlen==1
- * even when there is no payload. in the per-slot offset case, we'll replace this with
- * the proper source data. */
- fs_reg src = vgrf(glsl_type::float_type);
- int mlen = 1; /* one reg unless overriden */
- int reg_width = dispatch_width / 8;
- fs_inst *inst;
-
- switch (ir->operation) {
- case ir_unop_interpolate_at_centroid:
- inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
- break;
-
- case ir_binop_interpolate_at_sample: {
- ir_constant *sample_num = ir->operands[1]->as_constant();
- assert(sample_num || !"nonconstant sample number should have been lowered.");
-
- unsigned msg_data = sample_num->value.i[0] << 4;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data));
- break;
- }
-
- case ir_binop_interpolate_at_offset: {
- ir_constant *const_offset = ir->operands[1]->as_constant();
- if (const_offset) {
- unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
- (pack_pixel_offset(const_offset->value.f[1]) << 4);
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
- fs_reg(msg_data));
- } else {
- /* pack the operands: hw wants offsets as 4 bit signed ints */
- ir->operands[1]->accept(this);
- src = vgrf(glsl_type::ivec2_type);
- fs_reg src2 = src;
- for (int i = 0; i < 2; i++) {
- fs_reg temp = vgrf(glsl_type::float_type);
- emit(MUL(temp, this->result, fs_reg(16.0f)));
- emit(MOV(src2, temp)); /* float to int */
-
- /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires
- * that we support a maximum offset of +0.5, which isn't representable
- * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16,
- * which is the opposite of what the shader author wanted.
- *
- * This is legal due to ARB_gpu_shader5's quantization rules:
- *
- * "Not all values of <offset> may be supported; x and y offsets may
- * be rounded to fixed-point values with the number of fraction bits
- * given by the implementation-dependent constant
- * FRAGMENT_INTERPOLATION_OFFSET_BITS"
- */
-
- fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
- inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
-
- src2 = offset(src2, 1);
- this->result = offset(this->result, 1);
- }
-
- mlen = 2 * reg_width;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
- fs_reg(0u));
- }
- break;
- }
-
- default:
- unreachable("not reached");
- }
-
- inst->mlen = mlen;
- inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */
- inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) ==
- INTERP_QUALIFIER_NOPERSPECTIVE;
-
- /* 2. emit linterp */
-
- fs_reg res = vgrf(ir->type);
- this->result = res;
-
- for (int i = 0; i < ir->type->vector_elements; i++) {
- int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
- emit(FS_OPCODE_LINTERP, res, dst_xy,
- fs_reg(interp_reg(var->data.location, ch)));
- res = offset(res, 1);
- }
-}
-
-void
-fs_visitor::visit(ir_expression *ir)
-{
- unsigned int operand;
- fs_reg op[3], temp;
- fs_inst *inst;
- struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
-
- assert(ir->get_num_operands() <= 3);
-
- if (try_emit_saturate(ir))
- return;
-
- /* Deal with the real oddball stuff first */
- switch (ir->operation) {
- case ir_binop_add:
- if (devinfo->gen <= 5 && try_emit_line(ir))
- return;
- if (try_emit_mad(ir))
- return;
- break;
-
- case ir_triop_csel:
- ir->operands[1]->accept(this);
- op[1] = this->result;
- ir->operands[2]->accept(this);
- op[2] = this->result;
-
- emit_bool_to_cond_code(ir->operands[0]);
-
- this->result = vgrf(ir->type);
- inst = emit(SEL(this->result, op[1], op[2]));
- inst->predicate = BRW_PREDICATE_NORMAL;
- return;
-
- case ir_unop_b2f:
- if (devinfo->gen <= 5 && try_emit_b2f_of_comparison(ir))
- return;
- break;
-
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_offset:
- case ir_binop_interpolate_at_sample:
- emit_interpolate_expression(ir);
- return;
-
- default:
- break;
- }
-
- for (operand = 0; operand < ir->get_num_operands(); operand++) {
- ir->operands[operand]->accept(this);
- if (this->result.file == BAD_FILE) {
- fail("Failed to get tree for expression operand:\n");
- ir->operands[operand]->fprint(stderr);
- fprintf(stderr, "\n");
- }
- assert(this->result.file == GRF ||
- this->result.file == UNIFORM || this->result.file == ATTR);
- op[operand] = this->result;
-
- /* Matrix expression operands should have been broken down to vector
- * operations already.
- */
- assert(!ir->operands[operand]->type->is_matrix());
- /* And then those vector operands should have been broken down to scalar.
- */
- assert(!ir->operands[operand]->type->is_vector());
- }
-
- /* Storage for our result. If our result goes into an assignment, it will
- * just get copy-propagated out, so no worries.
- */
- this->result = vgrf(ir->type);
-
- switch (ir->operation) {
- case ir_unop_logic_not:
- emit(NOT(this->result, op[0]));
- break;
- case ir_unop_neg:
- op[0].negate = !op[0].negate;
- emit(MOV(this->result, op[0]));
- break;
- case ir_unop_abs:
- op[0].abs = true;
- op[0].negate = false;
- emit(MOV(this->result, op[0]));
- break;
- case ir_unop_sign:
- if (ir->type->is_float()) {
- /* AND(val, 0x80000000) gives the sign bit.
- *
- * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
- * zero.
- */
- emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
-
- op[0].type = BRW_REGISTER_TYPE_UD;
- this->result.type = BRW_REGISTER_TYPE_UD;
- emit(AND(this->result, op[0], fs_reg(0x80000000u)));
-
- inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u)));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- this->result.type = BRW_REGISTER_TYPE_F;
- } else {
- /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
- * -> non-negative val generates 0x00000000.
- * Predicated OR sets 1 if val is positive.
- */
- emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
-
- emit(ASR(this->result, op[0], fs_reg(31)));
-
- inst = emit(OR(this->result, this->result, fs_reg(1)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
- break;
- case ir_unop_rcp:
- emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
- break;
-
- case ir_unop_exp2:
- emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
- break;
- case ir_unop_log2:
- emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
- break;
- case ir_unop_exp:
- case ir_unop_log:
- unreachable("not reached: should be handled by ir_explog_to_explog2");
- case ir_unop_sin:
- emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
- break;
- case ir_unop_cos:
- emit_math(SHADER_OPCODE_COS, this->result, op[0]);
- break;
-
- case ir_unop_dFdx:
- /* Select one of the two opcodes based on the glHint value. */
- if (fs_key->high_quality_derivatives)
- emit(FS_OPCODE_DDX_FINE, this->result, op[0]);
- else
- emit(FS_OPCODE_DDX_COARSE, this->result, op[0]);
- break;
-
- case ir_unop_dFdx_coarse:
- emit(FS_OPCODE_DDX_COARSE, this->result, op[0]);
- break;
-
- case ir_unop_dFdx_fine:
- emit(FS_OPCODE_DDX_FINE, this->result, op[0]);
- break;
-
- case ir_unop_dFdy:
- /* Select one of the two opcodes based on the glHint value. */
- if (fs_key->high_quality_derivatives)
- emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo));
- else
- emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo));
- break;
-
- case ir_unop_dFdy_coarse:
- emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo));
- break;
-
- case ir_unop_dFdy_fine:
- emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo));
- break;
-
- case ir_binop_add:
- emit(ADD(this->result, op[0], op[1]));
- break;
- case ir_binop_sub:
- unreachable("not reached: should be handled by ir_sub_to_add_neg");
-
- case ir_binop_mul:
- if (devinfo->gen < 8 && ir->type->is_integer()) {
- /* For integer multiplication, the MUL uses the low 16 bits
- * of one of the operands (src0 on gen6, src1 on gen7). The
- * MACH accumulates in the contribution of the upper 16 bits
- * of that operand.
- */
- if (ir->operands[0]->is_uint16_constant()) {
- if (devinfo->gen < 7)
- emit(MUL(this->result, op[0], op[1]));
- else
- emit(MUL(this->result, op[1], op[0]));
- } else if (ir->operands[1]->is_uint16_constant()) {
- if (devinfo->gen < 7)
- emit(MUL(this->result, op[1], op[0]));
- else
- emit(MUL(this->result, op[0], op[1]));
- } else {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- this->result.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(reg_null_d, op[0], op[1]));
- emit(MOV(this->result, fs_reg(acc)));
- }
- } else {
- emit(MUL(this->result, op[0], op[1]));
- }
- break;
- case ir_binop_imul_high: {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- this->result.type);
-
- fs_inst *mul = emit(MUL(acc, op[0], op[1]));
- emit(MACH(this->result, op[0], op[1]));
-
- /* Until Gen8, integer multiplies read 32-bits from one source, and
- * 16-bits from the other, and relying on the MACH instruction to
- * generate the high bits of the result.
- *
- * On Gen8, the multiply instruction does a full 32x32-bit multiply,
- * but in order to do a 64x64-bit multiply we have to simulate the
- * previous behavior and then use a MACH instruction.
- *
- * FINISHME: Don't use source modifiers on src1.
- */
- if (devinfo->gen >= 8) {
- assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
- mul->src[1].type == BRW_REGISTER_TYPE_UD);
- if (mul->src[1].type == BRW_REGISTER_TYPE_D) {
- mul->src[1].type = BRW_REGISTER_TYPE_W;
- mul->src[1].stride = 2;
- } else {
- mul->src[1].type = BRW_REGISTER_TYPE_UW;
- mul->src[1].stride = 2;
- }
- }
-
- break;
- }
- case ir_binop_div:
- /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
- break;
- case ir_binop_carry: {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- BRW_REGISTER_TYPE_UD);
-
- emit(ADDC(reg_null_ud, op[0], op[1]));
- emit(MOV(this->result, fs_reg(acc)));
- break;
- }
- case ir_binop_borrow: {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- BRW_REGISTER_TYPE_UD);
-
- emit(SUBB(reg_null_ud, op[0], op[1]));
- emit(MOV(this->result, fs_reg(acc)));
- break;
- }
- case ir_binop_mod:
- /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
- break;
-
- case ir_binop_less:
- case ir_binop_greater:
- case ir_binop_lequal:
- case ir_binop_gequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- resolve_bool_comparison(ir->operands[1], &op[1]);
- }
-
- emit(CMP(this->result, op[0], op[1],
- brw_conditional_for_comparison(ir->operation)));
- break;
-
- case ir_binop_logic_xor:
- emit(XOR(this->result, op[0], op[1]));
- break;
-
- case ir_binop_logic_or:
- emit(OR(this->result, op[0], op[1]));
- break;
-
- case ir_binop_logic_and:
- emit(AND(this->result, op[0], op[1]));
- break;
-
- case ir_binop_dot:
- case ir_unop_any:
- unreachable("not reached: should be handled by brw_fs_channel_expressions");
-
- case ir_unop_noise:
- unreachable("not reached: should be handled by lower_noise");
-
- case ir_quadop_vector:
- unreachable("not reached: should be handled by lower_quadop_vector");
-
- case ir_binop_vector_extract:
- unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()");
-
- case ir_triop_vector_insert:
- unreachable("not reached: should be handled by lower_vector_insert()");
-
- case ir_binop_ldexp:
- unreachable("not reached: should be handled by ldexp_to_arith()");
-
- case ir_unop_sqrt:
- emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
- break;
-
- case ir_unop_rsq:
- emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
- break;
-
- case ir_unop_bitcast_i2f:
- case ir_unop_bitcast_u2f:
- op[0].type = BRW_REGISTER_TYPE_F;
- this->result = op[0];
- break;
- case ir_unop_i2u:
- case ir_unop_bitcast_f2u:
- op[0].type = BRW_REGISTER_TYPE_UD;
- this->result = op[0];
- break;
- case ir_unop_u2i:
- case ir_unop_bitcast_f2i:
- op[0].type = BRW_REGISTER_TYPE_D;
- this->result = op[0];
- break;
- case ir_unop_i2f:
- case ir_unop_u2f:
- case ir_unop_f2i:
- case ir_unop_f2u:
- emit(MOV(this->result, op[0]));
- break;
-
- case ir_unop_b2i:
- emit(AND(this->result, op[0], fs_reg(1)));
- break;
- case ir_unop_b2f:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- }
- op[0].type = BRW_REGISTER_TYPE_D;
- this->result.type = BRW_REGISTER_TYPE_D;
- emit(AND(this->result, op[0], fs_reg(0x3f800000u)));
- this->result.type = BRW_REGISTER_TYPE_F;
- break;
-
- case ir_unop_f2b:
- emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
- break;
- case ir_unop_i2b:
- emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- break;
-
- case ir_unop_trunc:
- emit(RNDZ(this->result, op[0]));
- break;
- case ir_unop_ceil: {
- fs_reg tmp = vgrf(ir->type);
- op[0].negate = !op[0].negate;
- emit(RNDD(tmp, op[0]));
- tmp.negate = true;
- emit(MOV(this->result, tmp));
- }
- break;
- case ir_unop_floor:
- emit(RNDD(this->result, op[0]));
- break;
- case ir_unop_fract:
- emit(FRC(this->result, op[0]));
- break;
- case ir_unop_round_even:
- emit(RNDE(this->result, op[0]));
- break;
-
- case ir_binop_min:
- case ir_binop_max:
- resolve_ud_negate(&op[0]);
- resolve_ud_negate(&op[1]);
- emit_minmax(ir->operation == ir_binop_min ?
- BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
- this->result, op[0], op[1]);
- break;
- case ir_unop_pack_snorm_2x16:
- case ir_unop_pack_snorm_4x8:
- case ir_unop_pack_unorm_2x16:
- case ir_unop_pack_unorm_4x8:
- case ir_unop_unpack_snorm_2x16:
- case ir_unop_unpack_snorm_4x8:
- case ir_unop_unpack_unorm_2x16:
- case ir_unop_unpack_unorm_4x8:
- case ir_unop_unpack_half_2x16:
- case ir_unop_pack_half_2x16:
- unreachable("not reached: should be handled by lower_packing_builtins");
- case ir_unop_unpack_half_2x16_split_x:
- emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
- break;
- case ir_unop_unpack_half_2x16_split_y:
- emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
- break;
- case ir_binop_pow:
- emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
- break;
-
- case ir_unop_bitfield_reverse:
- emit(BFREV(this->result, op[0]));
- break;
- case ir_unop_bit_count:
- emit(CBIT(this->result, op[0]));
- break;
- case ir_unop_find_msb:
- temp = vgrf(glsl_type::uint_type);
- emit(FBH(temp, op[0]));
-
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
- * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
- * subtract the result from 31 to convert the MSB count into an LSB count.
- */
-
- /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
- emit(MOV(this->result, temp));
- emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
-
- temp.negate = true;
- inst = emit(ADD(this->result, temp, fs_reg(31)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- break;
- case ir_unop_find_lsb:
- emit(FBL(this->result, op[0]));
- break;
- case ir_unop_saturate:
- inst = emit(MOV(this->result, op[0]));
- inst->saturate = true;
- break;
- case ir_triop_bitfield_extract:
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(BFE(this->result, op[2], op[1], op[0]));
- break;
- case ir_binop_bfm:
- emit(BFI1(this->result, op[0], op[1]));
- break;
- case ir_triop_bfi:
- emit(BFI2(this->result, op[0], op[1], op[2]));
- break;
- case ir_quadop_bitfield_insert:
- unreachable("not reached: should be handled by "
- "lower_instructions::bitfield_insert_to_bfm_bfi");
-
- case ir_unop_bit_not:
- emit(NOT(this->result, op[0]));
- break;
- case ir_binop_bit_and:
- emit(AND(this->result, op[0], op[1]));
- break;
- case ir_binop_bit_xor:
- emit(XOR(this->result, op[0], op[1]));
- break;
- case ir_binop_bit_or:
- emit(OR(this->result, op[0], op[1]));
- break;
-
- case ir_binop_lshift:
- emit(SHL(this->result, op[0], op[1]));
- break;
-
- case ir_binop_rshift:
- if (ir->type->base_type == GLSL_TYPE_INT)
- emit(ASR(this->result, op[0], op[1]));
- else
- emit(SHR(this->result, op[0], op[1]));
- break;
- case ir_binop_pack_half_2x16_split:
- emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
- break;
- case ir_binop_ubo_load: {
- /* This IR node takes a constant uniform block and a constant or
- * variable byte offset within the block and loads a vector from that.
- */
- ir_constant *const_uniform_block = ir->operands[0]->as_constant();
- ir_constant *const_offset = ir->operands[1]->as_constant();
- fs_reg surf_index;
- uint32_t binding, set, index, set_index;
-
- if (const_uniform_block) {
- /* The block index is a constant, so just emit the binding table entry
- * as an immediate.
- */
- index = const_uniform_block->value.u[0];
- set = shader->base.UniformBlocks[index].Set;
- set_index = shader->base.UniformBlocks[index].Binding;
- binding = stage_prog_data->bind_map[set].index[set_index];
- surf_index = fs_reg(binding);
- } else {
- assert(0 && "need more info from the ir for this.");
-
- /* The block index is not a constant. Evaluate the index expression
- * per-channel and add the base UBO index; we have to select a value
- * from any live channel.
- */
- surf_index = vgrf(glsl_type::uint_type);
- emit(ADD(surf_index, op[0],
- fs_reg(stage_prog_data->binding_table.ubo_start)));
- emit_uniformize(surf_index, surf_index);
-
- /* Assume this may touch any UBO. It would be nice to provide
- * a tighter bound, but the array information is already lowered away.
- */
- brw_mark_surface_used(prog_data,
- stage_prog_data->binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
- }
-
- if (const_offset) {
- fs_reg packed_consts = vgrf(glsl_type::float_type);
- packed_consts.type = result.type;
-
- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
- emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
- packed_consts, surf_index, const_offset_reg));
-
- for (int i = 0; i < ir->type->vector_elements; i++) {
- packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i);
-
- /* The std140 packing rules don't allow vectors to cross 16-byte
- * boundaries, and a reg is 32 bytes.
- */
- assert(packed_consts.subreg_offset < 32);
-
- /* UBO bools are any nonzero value. We consider bools to be
- * values with the low bit set to 1. Convert them using CMP.
- */
- if (ir->type->base_type == GLSL_TYPE_BOOL) {
- emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
- } else {
- emit(MOV(result, packed_consts));
- }
-
- result = offset(result, 1);
- }
- } else {
- /* Turn the byte offset into a dword offset. */
- fs_reg base_offset = vgrf(glsl_type::int_type);
- emit(SHR(base_offset, op[1], fs_reg(2)));
-
- for (int i = 0; i < ir->type->vector_elements; i++) {
- emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
- base_offset, i));
-
- if (ir->type->base_type == GLSL_TYPE_BOOL)
- emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
-
- result = offset(result, 1);
- }
- }
-
- result.reg_offset = 0;
- break;
- }
-
- case ir_triop_fma:
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(MAD(this->result, op[2], op[1], op[0]));
- break;
-
- case ir_triop_lrp:
- emit_lrp(this->result, op[0], op[1], op[2]);
- break;
-
- case ir_triop_csel:
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_offset:
- case ir_binop_interpolate_at_sample:
- unreachable("already handled above");
- break;
-
- case ir_unop_d2f:
- case ir_unop_f2d:
- case ir_unop_d2i:
- case ir_unop_i2d:
- case ir_unop_d2u:
- case ir_unop_u2d:
- case ir_unop_d2b:
- case ir_unop_pack_double_2x32:
- case ir_unop_unpack_double_2x32:
- case ir_unop_frexp_sig:
- case ir_unop_frexp_exp:
- unreachable("fp64 todo");
- break;
- }
-}
-
-void
-fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
- const glsl_type *type, bool predicated)
-{
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_INT:
- case GLSL_TYPE_BOOL:
- for (unsigned int i = 0; i < type->components(); i++) {
- l.type = brw_type_for_base_type(type);
- r.type = brw_type_for_base_type(type);
-
- if (predicated || !l.equals(r)) {
- fs_inst *inst = emit(MOV(l, r));
- inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
- }
-
- l = offset(l, 1);
- r = offset(r, 1);
- }
- break;
- case GLSL_TYPE_ARRAY:
- for (unsigned int i = 0; i < type->length; i++) {
- emit_assignment_writes(l, r, type->fields.array, predicated);
- }
- break;
-
- case GLSL_TYPE_STRUCT:
- for (unsigned int i = 0; i < type->length; i++) {
- emit_assignment_writes(l, r, type->fields.structure[i].type,
- predicated);
- }
- break;
-
- case GLSL_TYPE_SAMPLER:
- case GLSL_TYPE_IMAGE:
- case GLSL_TYPE_ATOMIC_UINT:
- break;
-
- case GLSL_TYPE_DOUBLE:
- case GLSL_TYPE_VOID:
- case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
- case GLSL_TYPE_FUNCTION:
- unreachable("not reached");
- }
-}
-
-/* If the RHS processing resulted in an instruction generating a
- * temporary value, and it would be easy to rewrite the instruction to
- * generate its result right into the LHS instead, do so. This ends
- * up reliably removing instructions where it can be tricky to do so
- * later without real UD chain information.
- */
-bool
-fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
- fs_reg dst,
- fs_reg src,
- fs_inst *pre_rhs_inst,
- fs_inst *last_rhs_inst)
-{
- /* Only attempt if we're doing a direct assignment. */
- if (ir->condition ||
- !(ir->lhs->type->is_scalar() ||
- (ir->lhs->type->is_vector() &&
- ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
- return false;
-
- /* Make sure the last instruction generated our source reg. */
- fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
- last_rhs_inst,
- src);
- if (!modify)
- return false;
-
- /* If last_rhs_inst wrote a different number of components than our LHS,
- * we can't safely rewrite it.
- */
- if (alloc.sizes[dst.reg] != modify->regs_written)
- return false;
-
- /* Success! Rewrite the instruction. */
- modify->dst = dst;
-
- return true;
-}
-
-void
-fs_visitor::visit(ir_assignment *ir)
-{
- fs_reg l, r;
- fs_inst *inst;
-
- /* FINISHME: arrays on the lhs */
- ir->lhs->accept(this);
- l = this->result;
-
- fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
-
- ir->rhs->accept(this);
- r = this->result;
-
- fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
-
- assert(l.file != BAD_FILE);
- assert(r.file != BAD_FILE);
-
- if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
- return;
-
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition);
- }
-
- if (ir->lhs->type->is_scalar() ||
- ir->lhs->type->is_vector()) {
- for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
- if (ir->write_mask & (1 << i)) {
- inst = emit(MOV(l, r));
- if (ir->condition)
- inst->predicate = BRW_PREDICATE_NORMAL;
- r = offset(r, 1);
- }
- l = offset(l, 1);
- }
- } else {
- emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
- }
-}
-
fs_inst *
fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
fs_reg coordinate, int coord_components,
@@ -1458,7 +94,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
if (shadow_c.file != BAD_FILE) {
for (int i = 0; i < coord_components; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate = offset(coordinate, 1);
}
@@ -1466,7 +102,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
* the unused slots must be zeroed.
*/
for (int i = coord_components; i < 3; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
}
mlen += 3;
@@ -1474,25 +110,25 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
/* There's no plain shadow compare message, so we use shadow
* compare with a bias of 0.0.
*/
- emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
mlen++;
} else if (op == ir_txb || op == ir_txl) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), lod);
mlen++;
} else {
unreachable("Should not get here.");
}
- emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c);
mlen++;
} else if (op == ir_tex) {
for (int i = 0; i < coord_components; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate = offset(coordinate, 1);
}
/* zero the others. */
for (int i = coord_components; i<3; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
mlen += 3;
@@ -1500,7 +136,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
fs_reg &dPdx = lod;
for (int i = 0; i < coord_components; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate = offset(coordinate, 1);
}
/* the slots for u and v are always present, but r is optional */
@@ -1521,20 +157,20 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
* m5 m6 m7 m8 m9 m10
*/
for (int i = 0; i < grad_components; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx);
dPdx = offset(dPdx, 1);
}
mlen += MAX2(grad_components, 2);
for (int i = 0; i < grad_components; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy);
dPdy = offset(dPdy, 1);
}
mlen += MAX2(grad_components, 2);
} else if (op == ir_txs) {
/* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
simd16 = true;
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod);
mlen += 2;
} else {
/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
@@ -1544,8 +180,8 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
assert(op == ir_txb || op == ir_txl || op == ir_txf);
for (int i = 0; i < coord_components; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
- coordinate));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
+ coordinate);
coordinate = offset(coordinate, 1);
}
@@ -1553,13 +189,13 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
* be necessary for TXF (ld), but seems wise to do for all messages.
*/
for (int i = coord_components; i < 3; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f));
}
/* lod/bias appears after u/v/r. */
mlen += 6;
- emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
+ bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod);
mlen++;
/* The unused upper half. */
@@ -1587,7 +223,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
unreachable("not reached");
}
- fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
+ fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
inst->base_mrf = base_mrf;
inst->mlen = mlen;
inst->header_size = 1;
@@ -1595,7 +231,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
if (simd16) {
for (int i = 0; i < 4; i++) {
- emit(MOV(orig_dst, dst));
+ bld.MOV(orig_dst, dst);
orig_dst = offset(orig_dst, 1);
dst = offset(dst, 2);
}
@@ -1621,7 +257,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
/* Copy the coordinates. */
for (int i = 0; i < vector_elements; i++) {
- emit(MOV(retype(offset(message, i), coordinate.type), coordinate));
+ bld.MOV(retype(offset(message, i), coordinate.type), coordinate);
coordinate = offset(coordinate, 1);
}
@@ -1630,20 +266,20 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
/* Messages other than sample and ld require all three components */
if (has_lod || shadow_c.file != BAD_FILE) {
for (int i = vector_elements; i < 3; i++) {
- emit(MOV(offset(message, i), fs_reg(0.0f)));
+ bld.MOV(offset(message, i), fs_reg(0.0f));
}
}
if (has_lod) {
fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ?
BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
- emit(MOV(msg_lod, lod));
+ bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
}
if (shadow_c.file != BAD_FILE) {
fs_reg msg_ref = offset(message, 3 + has_lod);
- emit(MOV(msg_ref, shadow_c));
+ bld.MOV(msg_ref, shadow_c);
msg_end = offset(msg_ref, 1);
}
@@ -1658,7 +294,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
default: unreachable("not reached");
}
- fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
+ fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
inst->base_mrf = message.reg - 1;
inst->mlen = msg_end.reg - inst->base_mrf;
inst->header_size = 1;
@@ -1698,7 +334,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
}
for (int i = 0; i < vector_elements; i++) {
- emit(MOV(retype(offset(msg_coords, i), coordinate.type), coordinate));
+ bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate);
coordinate = offset(coordinate, 1);
}
fs_reg msg_end = offset(msg_coords, vector_elements);
@@ -1706,7 +342,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
if (shadow_c.file != BAD_FILE) {
fs_reg msg_shadow = msg_lod;
- emit(MOV(msg_shadow, shadow_c));
+ bld.MOV(msg_shadow, shadow_c);
msg_lod = offset(msg_shadow, 1);
msg_end = msg_lod;
}
@@ -1717,13 +353,13 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
opcode = SHADER_OPCODE_TEX;
break;
case ir_txb:
- emit(MOV(msg_lod, lod));
+ bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
opcode = FS_OPCODE_TXB;
break;
case ir_txl:
- emit(MOV(msg_lod, lod));
+ bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXL;
@@ -1740,11 +376,11 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
*/
msg_end = msg_lod;
for (int i = 0; i < grad_components; i++) {
- emit(MOV(msg_end, lod));
+ bld.MOV(msg_end, lod);
lod = offset(lod, 1);
msg_end = offset(msg_end, 1);
- emit(MOV(msg_end, lod2));
+ bld.MOV(msg_end, lod2);
lod2 = offset(lod2, 1);
msg_end = offset(msg_end, 1);
}
@@ -1754,21 +390,21 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
}
case ir_txs:
msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
- emit(MOV(msg_lod, lod));
+ bld.MOV(msg_lod, lod);
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXS;
break;
case ir_query_levels:
msg_lod = msg_end;
- emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXS;
break;
case ir_txf:
msg_lod = offset(msg_coords, 3);
- emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);
msg_end = offset(msg_lod, 1);
opcode = SHADER_OPCODE_TXF;
@@ -1776,9 +412,9 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
case ir_txf_ms:
msg_lod = offset(msg_coords, 3);
/* lod */
- emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
/* sample index */
- emit(MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index));
+ bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index);
msg_end = offset(msg_lod, 2);
opcode = SHADER_OPCODE_TXF_CMS;
@@ -1793,7 +429,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
unreachable("not reached");
}
- fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
+ fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
inst->base_mrf = message.reg;
inst->mlen = msg_end.reg - message.reg;
inst->header_size = header_size;
@@ -1851,7 +487,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
}
if (shadow_c.file != BAD_FILE) {
- emit(MOV(sources[length], shadow_c));
+ bld.MOV(sources[length], shadow_c);
length++;
}
@@ -1874,11 +510,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
case ir_lod:
break;
case ir_txb:
- emit(MOV(sources[length], lod));
+ bld.MOV(sources[length], lod);
length++;
break;
case ir_txl:
- emit(MOV(sources[length], lod));
+ bld.MOV(sources[length], lod);
length++;
break;
case ir_txd: {
@@ -1888,7 +524,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
*/
for (int i = 0; i < coord_components; i++) {
- emit(MOV(sources[length], coordinate));
+ bld.MOV(sources[length], coordinate);
coordinate = offset(coordinate, 1);
length++;
@@ -1896,11 +532,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
* only derivatives for (u, v, r).
*/
if (i < grad_components) {
- emit(MOV(sources[length], lod));
+ bld.MOV(sources[length], lod);
lod = offset(lod, 1);
length++;
- emit(MOV(sources[length], lod2));
+ bld.MOV(sources[length], lod2);
lod2 = offset(lod2, 1);
length++;
}
@@ -1910,11 +546,11 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
break;
}
case ir_txs:
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
length++;
break;
case ir_query_levels:
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u));
length++;
break;
case ir_txf:
@@ -1922,23 +558,23 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
* On Gen9 they are u, v, lod, r
*/
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
coordinate = offset(coordinate, 1);
length++;
if (devinfo->gen >= 9) {
if (coord_components >= 2) {
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
coordinate = offset(coordinate, 1);
}
length++;
}
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
length++;
for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
coordinate = offset(coordinate, 1);
length++;
}
@@ -1946,18 +582,18 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
coordinate_done = true;
break;
case ir_txf_ms:
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
length++;
/* data from the multisample control surface */
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
length++;
/* there is no offsetting for this message; just copy in the integer
* texture coordinates
*/
for (int i = 0; i < coord_components; i++) {
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
coordinate = offset(coordinate, 1);
length++;
}
@@ -1971,19 +607,19 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
/* More crazy intermixing */
for (int i = 0; i < 2; i++) { /* u, v */
- emit(MOV(sources[length], coordinate));
+ bld.MOV(sources[length], coordinate);
coordinate = offset(coordinate, 1);
length++;
}
for (int i = 0; i < 2; i++) { /* offu, offv */
- emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value));
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value);
offset_value = offset(offset_value, 1);
length++;
}
if (coord_components == 3) { /* r if present */
- emit(MOV(sources[length], coordinate));
+ bld.MOV(sources[length], coordinate);
coordinate = offset(coordinate, 1);
length++;
}
@@ -1996,7 +632,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
/* Set up the coordinate (except for cases where it was done above) */
if (!coordinate_done) {
for (int i = 0; i < coord_components; i++) {
- emit(MOV(sources[length], coordinate));
+ bld.MOV(sources[length], coordinate);
coordinate = offset(coordinate, 1);
length++;
}
@@ -2010,7 +646,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_F, dispatch_width);
- emit(LOAD_PAYLOAD(src_payload, sources, length, header_size));
+ bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
/* Generate the SEND */
enum opcode opcode;
@@ -2033,7 +669,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
default:
unreachable("not reached");
}
- fs_inst *inst = emit(opcode, dst, src_payload, sampler);
+ fs_inst *inst = bld.emit(opcode, dst, src_payload, sampler);
inst->base_mrf = -1;
inst->mlen = mlen;
inst->header_size = header_size;
@@ -2051,7 +687,6 @@ fs_reg
fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
bool is_rect, uint32_t sampler, int texunit)
{
- fs_inst *inst = NULL;
bool needs_gl_clamp = true;
fs_reg scale_x, scale_y;
@@ -2110,10 +745,10 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
fs_reg src = coordinate;
coordinate = dst;
- emit(MUL(dst, src, scale_x));
+ bld.MUL(dst, src, scale_x);
dst = offset(dst, 1);
src = offset(src, 1);
- emit(MUL(dst, src, scale_y));
+ bld.MUL(dst, src, scale_y);
} else if (is_rect) {
/* On gen6+, the sampler handles the rectangle coordinates
* natively, without needing rescaling. But that means we have
@@ -2127,8 +762,8 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
fs_reg chan = coordinate;
chan = offset(chan, i);
- inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f));
- inst->conditional_mod = BRW_CONDITIONAL_GE;
+ set_condmod(BRW_CONDITIONAL_GE,
+ bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)));
/* Our parameter comes in as 1.0/width or 1.0/height,
* because that's what people normally want for doing
@@ -2137,11 +772,11 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
* parameter type, so just invert back.
*/
fs_reg limit = vgrf(glsl_type::float_type);
- emit(MOV(limit, i == 0 ? scale_x : scale_y));
- emit(SHADER_OPCODE_RCP, limit, limit);
+ bld.MOV(limit, i == 0 ? scale_x : scale_y);
+ bld.emit(SHADER_OPCODE_RCP, limit, limit);
- inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
- inst->conditional_mod = BRW_CONDITIONAL_L;
+ set_condmod(BRW_CONDITIONAL_L,
+ bld.emit(BRW_OPCODE_SEL, chan, chan, limit));
}
}
}
@@ -2151,9 +786,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
fs_reg chan = coordinate;
chan = offset(chan, i);
-
- fs_inst *inst = emit(MOV(chan, chan));
- inst->saturate = true;
+ set_saturate(true, bld.MOV(chan, chan));
}
}
}
@@ -2173,13 +806,13 @@ fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler)
/* parameters are: u, v, r; missing parameters are treated as zero */
for (int i = 0; i < components; i++) {
sources[i] = vgrf(glsl_type::float_type);
- emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate));
+ bld.MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate);
coordinate = offset(coordinate, 1);
}
- emit(LOAD_PAYLOAD(payload, sources, components, 0));
+ bld.LOAD_PAYLOAD(payload, sources, components, 0);
- fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler);
+ fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler);
inst->base_mrf = -1;
inst->mlen = components * reg_width;
inst->header_size = 0;
@@ -2219,7 +852,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
this->result = res;
for (int i=0; i<4; i++) {
- emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)));
+ bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
res = offset(res, 1);
}
return;
@@ -2276,7 +909,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
if (op == ir_txs && is_cube_array) {
fs_reg depth = offset(dst, 2);
fs_reg fixed_depth = vgrf(glsl_type::int_type);
- emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
+ bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
int components = inst->regs_written / (dst.width / 8);
@@ -2287,167 +920,12 @@ fs_visitor::emit_texture(ir_texture_opcode op,
fixed_payload[i] = offset(dst, i);
}
}
- emit(LOAD_PAYLOAD(dst, fixed_payload, components, 0));
+ bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
}
swizzle_result(op, dest_type->vector_elements, dst, sampler);
}
-void
-fs_visitor::visit(ir_texture *ir)
-{
- uint32_t sampler;
-
- ir_dereference_variable *deref_var = ir->sampler->as_dereference_variable();
- assert(deref_var);
- ir_variable *var = deref_var->var;
-
- sampler = stage_prog_data->bind_map[var->data.set].index[var->data.index];
-
- ir_rvalue *nonconst_sampler_index =
- _mesa_get_sampler_array_nonconst_index(ir->sampler);
-
- /* Handle non-constant sampler array indexing */
- fs_reg sampler_reg;
- if (nonconst_sampler_index) {
- /* The highest sampler which may be used by this operation is
- * the last element of the array. Mark it here, because the generator
- * doesn't have enough information to determine the bound.
- */
- uint32_t array_size = ir->sampler->as_dereference_array()
- ->array->type->array_size();
-
- uint32_t max_used = sampler + array_size - 1;
- if (ir->op == ir_tg4 && devinfo->gen < 8) {
- max_used += stage_prog_data->binding_table.gather_texture_start;
- } else {
- max_used += stage_prog_data->binding_table.texture_start;
- }
-
- brw_mark_surface_used(prog_data, max_used);
-
- /* Emit code to evaluate the actual indexing expression */
- nonconst_sampler_index->accept(this);
- fs_reg temp = vgrf(glsl_type::uint_type);
- emit(ADD(temp, this->result, fs_reg(sampler)));
- emit_uniformize(temp, temp);
-
- sampler_reg = temp;
- } else {
- /* Single sampler, or constant array index; the indexing expression
- * is just an immediate.
- */
- sampler_reg = fs_reg(sampler);
- }
-
- /* FINISHME: We're failing to recompile our programs when the sampler is
- * updated. This only matters for the texture rectangle scale parameters
- * (pre-gen6, or gen6+ with GL_CLAMP).
- */
- int texunit = prog->SamplerUnits[sampler];
-
- /* Should be lowered by do_lower_texture_projection */
- assert(!ir->projector);
-
- /* Should be lowered */
- assert(!ir->offset || !ir->offset->type->is_array());
-
- /* Generate code to compute all the subexpression trees. This has to be
- * done before loading any values into MRFs for the sampler message since
- * generating these values may involve SEND messages that need the MRFs.
- */
- fs_reg coordinate;
- int coord_components = 0;
- if (ir->coordinate) {
- coord_components = ir->coordinate->type->vector_elements;
- ir->coordinate->accept(this);
- coordinate = this->result;
- }
-
- fs_reg shadow_comparitor;
- if (ir->shadow_comparitor) {
- ir->shadow_comparitor->accept(this);
- shadow_comparitor = this->result;
- }
-
- fs_reg offset_value;
- if (ir->offset) {
- ir_constant *const_offset = ir->offset->as_constant();
- if (const_offset) {
- /* Store the header bitfield in an IMM register. This allows us to
- * use offset_value.file to distinguish between no offset, a constant
- * offset, and a non-constant offset.
- */
- offset_value =
- fs_reg(brw_texture_offset(const_offset->value.i,
- const_offset->type->vector_elements));
- } else {
- ir->offset->accept(this);
- offset_value = this->result;
- }
- }
-
- fs_reg lod, lod2, sample_index, mcs;
- int grad_components = 0;
- switch (ir->op) {
- case ir_tex:
- case ir_lod:
- case ir_tg4:
- case ir_query_levels:
- break;
- case ir_txb:
- ir->lod_info.bias->accept(this);
- lod = this->result;
- break;
- case ir_txd:
- ir->lod_info.grad.dPdx->accept(this);
- lod = this->result;
-
- ir->lod_info.grad.dPdy->accept(this);
- lod2 = this->result;
-
- grad_components = ir->lod_info.grad.dPdx->type->vector_elements;
- break;
- case ir_txf:
- case ir_txl:
- case ir_txs:
- ir->lod_info.lod->accept(this);
- lod = this->result;
- break;
- case ir_txf_ms:
- ir->lod_info.sample_index->accept(this);
- sample_index = this->result;
-
- if (devinfo->gen >= 7 &&
- key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
- mcs = emit_mcs_fetch(coordinate, ir->coordinate->type->vector_elements,
- sampler_reg);
- } else {
- mcs = fs_reg(0u);
- }
- break;
- default:
- unreachable("Unrecognized texture opcode");
- };
-
- int gather_component = 0;
- if (ir->op == ir_tg4)
- gather_component = ir->lod_info.component->as_constant()->value.i[0];
-
- bool is_rect =
- ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT;
-
- bool is_cube_array =
- ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
- ir->sampler->type->sampler_array;
-
- emit_texture(ir->op, ir->type, coordinate, coord_components,
- shadow_comparitor, lod, lod2, grad_components,
- sample_index, offset_value, mcs,
- gather_component, is_cube_array, is_rect, sampler,
- sampler_reg, texunit);
-}
-
/**
* Apply workarounds for Gen6 gather with UINT/SINT
*/
@@ -2462,16 +940,16 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
for (int i = 0; i < 4; i++) {
fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
/* Convert from UNORM to UINT */
- emit(MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))));
- emit(MOV(dst, dst_f));
+ bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1)));
+ bld.MOV(dst, dst_f);
if (wa & WA_SIGN) {
/* Reinterpret the UINT value as a signed INT value by
* shifting the sign bit into place, then shifting back
* preserving sign.
*/
- emit(SHL(dst, dst, fs_reg(32 - width)));
- emit(ASR(dst, dst, fs_reg(32 - width)));
+ bld.SHL(dst, dst, fs_reg(32 - width));
+ bld.ASR(dst, dst, fs_reg(32 - width));
}
dst = offset(dst, 1);
@@ -2535,461 +1013,18 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
l = offset(l, i);
if (swiz == SWIZZLE_ZERO) {
- emit(MOV(l, fs_reg(0.0f)));
+ bld.MOV(l, fs_reg(0.0f));
} else if (swiz == SWIZZLE_ONE) {
- emit(MOV(l, fs_reg(1.0f)));
+ bld.MOV(l, fs_reg(1.0f));
} else {
- emit(MOV(l, offset(orig_val,
- GET_SWZ(key_tex->swizzles[sampler], i))));
+ bld.MOV(l, offset(orig_val,
+ GET_SWZ(key_tex->swizzles[sampler], i)));
}
}
this->result = swizzled_result;
}
}
-void
-fs_visitor::visit(ir_swizzle *ir)
-{
- ir->val->accept(this);
- fs_reg val = this->result;
-
- if (ir->type->vector_elements == 1) {
- this->result = offset(this->result, ir->mask.x);
- return;
- }
-
- fs_reg result = vgrf(ir->type);
- this->result = result;
-
- for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
- fs_reg channel = val;
- int swiz = 0;
-
- switch (i) {
- case 0:
- swiz = ir->mask.x;
- break;
- case 1:
- swiz = ir->mask.y;
- break;
- case 2:
- swiz = ir->mask.z;
- break;
- case 3:
- swiz = ir->mask.w;
- break;
- }
-
- emit(MOV(result, offset(channel, swiz)));
- result = offset(result, 1);
- }
-}
-
-void
-fs_visitor::visit(ir_discard *ir)
-{
- /* We track our discarded pixels in f0.1. By predicating on it, we can
- * update just the flag bits that aren't yet discarded. If there's no
- * condition, we emit a CMP of g0 != g0, so all currently executing
- * channels will get turned off.
- */
- fs_inst *cmp;
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition);
- cmp = (fs_inst *) this->instructions.get_tail();
- cmp->conditional_mod = brw_negate_cmod(cmp->conditional_mod);
- } else {
- fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
- BRW_REGISTER_TYPE_UW));
- cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ));
- }
- cmp->predicate = BRW_PREDICATE_NORMAL;
- cmp->flag_subreg = 1;
-
- if (devinfo->gen >= 6) {
- emit_discard_jump();
- }
-}
-
-void
-fs_visitor::visit(ir_constant *ir)
-{
- /* Set this->result to reg at the bottom of the function because some code
- * paths will cause this visitor to be applied to other fields. This will
- * cause the value stored in this->result to be modified.
- *
- * Make reg constant so that it doesn't get accidentally modified along the
- * way. Yes, I actually had this problem. :(
- */
- const fs_reg reg = vgrf(ir->type);
- fs_reg dst_reg = reg;
-
- if (ir->type->is_array()) {
- const unsigned size = type_size(ir->type->fields.array);
-
- for (unsigned i = 0; i < ir->type->length; i++) {
- ir->array_elements[i]->accept(this);
- fs_reg src_reg = this->result;
-
- dst_reg.type = src_reg.type;
- for (unsigned j = 0; j < size; j++) {
- emit(MOV(dst_reg, src_reg));
- src_reg = offset(src_reg, 1);
- dst_reg = offset(dst_reg, 1);
- }
- }
- } else if (ir->type->is_record()) {
- foreach_in_list(ir_constant, field, &ir->components) {
- const unsigned size = type_size(field->type);
-
- field->accept(this);
- fs_reg src_reg = this->result;
-
- dst_reg.type = src_reg.type;
- for (unsigned j = 0; j < size; j++) {
- emit(MOV(dst_reg, src_reg));
- src_reg = offset(src_reg, 1);
- dst_reg = offset(dst_reg, 1);
- }
- }
- } else {
- const unsigned size = type_size(ir->type);
-
- for (unsigned i = 0; i < size; i++) {
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
- break;
- case GLSL_TYPE_UINT:
- emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
- break;
- case GLSL_TYPE_INT:
- emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
- break;
- case GLSL_TYPE_BOOL:
- emit(MOV(dst_reg, fs_reg(ir->value.b[i] != 0 ? ~0 : 0)));
- break;
- default:
- unreachable("Non-float/uint/int/bool constant");
- }
- dst_reg = offset(dst_reg, 1);
- }
- }
-
- this->result = reg;
-}
-
-void
-fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
-{
- ir_expression *expr = ir->as_expression();
-
- if (!expr || expr->operation == ir_binop_ubo_load) {
- ir->accept(this);
-
- fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
- }
-
- fs_reg op[3];
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- assert(expr->operands[i]->type->is_scalar());
-
- expr->operands[i]->accept(this);
- op[i] = this->result;
-
- resolve_ud_negate(&op[i]);
- }
-
- emit_bool_to_cond_code_of_reg(expr, op);
-}
-
-void
-fs_visitor::emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3])
-{
- fs_inst *inst;
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
-
- case ir_binop_logic_xor:
- if (devinfo->gen <= 5) {
- fs_reg temp = vgrf(expr->type);
- emit(XOR(temp, op[0], op[1]));
- inst = emit(AND(reg_null_d, temp, fs_reg(1)));
- } else {
- inst = emit(XOR(reg_null_d, op[0], op[1]));
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_or:
- if (devinfo->gen <= 5) {
- fs_reg temp = vgrf(expr->type);
- emit(OR(temp, op[0], op[1]));
- inst = emit(AND(reg_null_d, temp, fs_reg(1)));
- } else {
- inst = emit(OR(reg_null_d, op[0], op[1]));
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_and:
- if (devinfo->gen <= 5) {
- fs_reg temp = vgrf(expr->type);
- emit(AND(temp, op[0], op[1]));
- inst = emit(AND(reg_null_d, temp, fs_reg(1)));
- } else {
- inst = emit(AND(reg_null_d, op[0], op[1]));
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_unop_f2b:
- if (devinfo->gen >= 6) {
- emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(reg_null_f, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_unop_i2b:
- if (devinfo->gen >= 6) {
- emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(reg_null_d, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
- }
-
- emit(CMP(reg_null_d, op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- break;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- inst = emit(MOV(reg_null_d, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to return. */
- fs_reg temp = vgrf(expr->operands[1]->type);
- inst = emit(SEL(temp, op[1], op[2]));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- /* Expand the result to a condition code. */
- inst = emit(MOV(reg_null_d, temp));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
- }
-
- default:
- unreachable("not reached");
- }
-}
-
-/**
- * Emit a gen6 IF statement with the comparison folded into the IF
- * instruction.
- */
-void
-fs_visitor::emit_if_gen6(ir_if *ir)
-{
- ir_expression *expr = ir->condition->as_expression();
-
- if (expr && expr->operation != ir_binop_ubo_load) {
- fs_reg op[3];
- fs_inst *inst;
- fs_reg temp;
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- assert(expr->operands[i]->type->is_scalar());
-
- expr->operands[i]->accept(this);
- op[i] = this->result;
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_Z));
- return;
-
- case ir_binop_logic_xor:
- emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_or:
- temp = vgrf(glsl_type::bool_type);
- emit(OR(temp, op[0], op[1]));
- emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_and:
- temp = vgrf(glsl_type::bool_type);
- emit(AND(temp, op[0], op[1]));
- emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_unop_f2b:
- inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
-
- case ir_unop_i2b:
- emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
- }
-
- emit(IF(op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- return;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- fs_inst *inst = emit(MOV(reg_null_d, op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to use as the result. */
- fs_reg temp = vgrf(expr->operands[1]->type);
- inst = emit(SEL(temp, op[1], op[2]));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
- return;
- }
-
- default:
- unreachable("not reached");
- }
- }
-
- ir->condition->accept(this);
- emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ));
-}
-
-bool
-fs_visitor::try_opt_frontfacing_ternary(ir_if *ir)
-{
- ir_dereference_variable *deref = ir->condition->as_dereference_variable();
- if (!deref || strcmp(deref->var->name, "gl_FrontFacing") != 0)
- return false;
-
- if (ir->then_instructions.length() != 1 ||
- ir->else_instructions.length() != 1)
- return false;
-
- ir_assignment *then_assign =
- ((ir_instruction *)ir->then_instructions.head)->as_assignment();
- ir_assignment *else_assign =
- ((ir_instruction *)ir->else_instructions.head)->as_assignment();
-
- if (!then_assign || then_assign->condition ||
- !else_assign || else_assign->condition ||
- then_assign->write_mask != else_assign->write_mask ||
- !then_assign->lhs->equals(else_assign->lhs))
- return false;
-
- ir_constant *then_rhs = then_assign->rhs->as_constant();
- ir_constant *else_rhs = else_assign->rhs->as_constant();
-
- if (!then_rhs || !else_rhs)
- return false;
-
- if (then_rhs->type->base_type != GLSL_TYPE_FLOAT)
- return false;
-
- if ((then_rhs->is_one() && else_rhs->is_negative_one()) ||
- (else_rhs->is_one() && then_rhs->is_negative_one())) {
- then_assign->lhs->accept(this);
- fs_reg dst = this->result;
- dst.type = BRW_REGISTER_TYPE_D;
- fs_reg tmp = vgrf(glsl_type::int_type);
-
- if (devinfo->gen >= 6) {
- /* Bit 15 of g0.0 is 0 if the polygon is front facing. */
- fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
-
- /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
- *
- * or(8) tmp.1<2>W g0.0<0,1,0>W 0x00003f80W
- * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D
- *
- * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0).
- */
-
- if (then_rhs->is_negative_one()) {
- assert(else_rhs->is_one());
- g0.negate = true;
- }
-
- tmp.type = BRW_REGISTER_TYPE_W;
- tmp.subreg_offset = 2;
- tmp.stride = 2;
-
- fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80)));
- or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
-
- tmp.type = BRW_REGISTER_TYPE_D;
- tmp.subreg_offset = 0;
- tmp.stride = 1;
- } else {
- /* Bit 31 of g1.6 is 0 if the polygon is front facing. */
- fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
-
- /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
- *
- * or(8) tmp<1>D g1.6<0,1,0>D 0x3f800000D
- * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D
- *
- * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0).
- */
-
- if (then_rhs->is_negative_one()) {
- assert(else_rhs->is_one());
- g1_6.negate = true;
- }
-
- emit(OR(tmp, g1_6, fs_reg(0x3f800000)));
- }
- emit(AND(dst, tmp, fs_reg(0xbf800000)));
- return true;
- }
-
- return false;
-}
-
/**
* Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL.
*
@@ -3056,21 +1091,21 @@ fs_visitor::try_replace_with_sel()
if (src0.file == IMM) {
src0 = vgrf(glsl_type::float_type);
src0.type = then_mov->src[0].type;
- emit(MOV(src0, then_mov->src[0]));
+ bld.MOV(src0, then_mov->src[0]);
}
- fs_inst *sel;
if (if_inst->conditional_mod) {
/* Sandybridge-specific IF with embedded comparison */
- emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod));
- sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
- sel->predicate = BRW_PREDICATE_NORMAL;
+ bld.CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1],
+ if_inst->conditional_mod);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ bld.emit(BRW_OPCODE_SEL, then_mov->dst,
+ src0, else_mov->src[0]));
} else {
/* Separate CMP and IF instructions */
- sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
- sel->predicate = if_inst->predicate;
- sel->predicate_inverse = if_inst->predicate_inverse;
+ set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse,
+ bld.emit(BRW_OPCODE_SEL, then_mov->dst,
+ src0, else_mov->src[0]));
}
return true;
@@ -3080,178 +1115,6 @@ fs_visitor::try_replace_with_sel()
}
void
-fs_visitor::visit(ir_if *ir)
-{
- if (try_opt_frontfacing_ternary(ir))
- return;
-
- /* Don't point the annotation at the if statement, because then it plus
- * the then and else blocks get printed.
- */
- this->base_ir = ir->condition;
-
- if (devinfo->gen == 6) {
- emit_if_gen6(ir);
- } else {
- emit_bool_to_cond_code(ir->condition);
-
- emit(IF(BRW_PREDICATE_NORMAL));
- }
-
- foreach_in_list(ir_instruction, ir_, &ir->then_instructions) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
-
- if (!ir->else_instructions.is_empty()) {
- emit(BRW_OPCODE_ELSE);
-
- foreach_in_list(ir_instruction, ir_, &ir->else_instructions) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
- }
-
- emit(BRW_OPCODE_ENDIF);
-
- if (!try_replace_with_sel() && devinfo->gen < 6) {
- no16("Can't support (non-uniform) control flow on SIMD16\n");
- }
-}
-
-void
-fs_visitor::visit(ir_loop *ir)
-{
- if (devinfo->gen < 6) {
- no16("Can't support (non-uniform) control flow on SIMD16\n");
- }
-
- this->base_ir = NULL;
- emit(BRW_OPCODE_DO);
-
- foreach_in_list(ir_instruction, ir_, &ir->body_instructions) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
-
- this->base_ir = NULL;
- emit(BRW_OPCODE_WHILE);
-}
-
-void
-fs_visitor::visit(ir_loop_jump *ir)
-{
- switch (ir->mode) {
- case ir_loop_jump::jump_break:
- emit(BRW_OPCODE_BREAK);
- break;
- case ir_loop_jump::jump_continue:
- emit(BRW_OPCODE_CONTINUE);
- break;
- }
-}
-
-void
-fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
- ir_dereference *deref = static_cast<ir_dereference *>(
- ir->actual_parameters.get_head());
- ir_variable *location = deref->variable_referenced();
- unsigned surf_index = (stage_prog_data->binding_table.abo_start +
- location->data.binding);
-
- /* Calculate the surface offset */
- fs_reg offset = vgrf(glsl_type::uint_type);
- ir_dereference_array *deref_array = deref->as_dereference_array();
-
- if (deref_array) {
- deref_array->array_index->accept(this);
-
- fs_reg tmp = vgrf(glsl_type::uint_type);
- emit(MUL(tmp, this->result, fs_reg(ATOMIC_COUNTER_SIZE)));
- emit(ADD(offset, tmp, fs_reg(location->data.atomic.offset)));
- } else {
- offset = fs_reg(location->data.atomic.offset);
- }
-
- /* Emit the appropriate machine instruction */
- const char *callee = ir->callee->function_name();
- ir->return_deref->accept(this);
- fs_reg dst = this->result;
-
- if (!strcmp("__intrinsic_atomic_read", callee)) {
- emit_untyped_surface_read(surf_index, dst, offset);
-
- } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
- emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
- fs_reg(), fs_reg());
-
- } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
- emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
- fs_reg(), fs_reg());
- }
-}
-
-void
-fs_visitor::visit(ir_call *ir)
-{
- const char *callee = ir->callee->function_name();
-
- if (!strcmp("__intrinsic_atomic_read", callee) ||
- !strcmp("__intrinsic_atomic_increment", callee) ||
- !strcmp("__intrinsic_atomic_predecrement", callee)) {
- visit_atomic_counter_intrinsic(ir);
- } else {
- unreachable("Unsupported intrinsic.");
- }
-}
-
-void
-fs_visitor::visit(ir_return *)
-{
- unreachable("FINISHME");
-}
-
-void
-fs_visitor::visit(ir_function *ir)
-{
- /* Ignore function bodies other than main() -- we shouldn't see calls to
- * them since they should all be inlined before we get to ir_to_mesa.
- */
- if (strcmp(ir->name, "main") == 0) {
- const ir_function_signature *sig;
- exec_list empty;
-
- sig = ir->matching_signature(NULL, &empty, false);
-
- assert(sig);
-
- foreach_in_list(ir_instruction, ir_, &sig->body) {
- this->base_ir = ir_;
- ir_->accept(this);
- }
- }
-}
-
-void
-fs_visitor::visit(ir_function_signature *)
-{
- unreachable("not reached");
-}
-
-void
-fs_visitor::visit(ir_emit_vertex *)
-{
- unreachable("not reached");
-}
-
-void
-fs_visitor::visit(ir_end_primitive *)
-{
- unreachable("not reached");
-}
-
-void
fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
fs_reg dst, fs_reg offset, fs_reg src0,
fs_reg src1)
@@ -3263,17 +1126,16 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* Initialize the sample mask in the message header. */
- emit(MOV(sources[0], fs_reg(0u)))
- ->force_writemask_all = true;
+ bld.exec_all().MOV(sources[0], fs_reg(0u));
if (stage == MESA_SHADER_FRAGMENT) {
if (((brw_wm_prog_data*)this->prog_data)->uses_kill) {
- emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1)))
- ->force_writemask_all = true;
+ bld.exec_all()
+ .MOV(component(sources[0], 7), brw_flag_reg(0, 1));
} else {
- emit(MOV(component(sources[0], 7),
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
- ->force_writemask_all = true;
+ bld.exec_all()
+ .MOV(component(sources[0], 7),
+ retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
}
} else {
/* The execution mask is part of the side-band information sent together with
@@ -3282,37 +1144,37 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
* the atomic operation.
*/
assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE);
- emit(MOV(component(sources[0], 7),
- fs_reg(0xffffu)))->force_writemask_all = true;
+ bld.exec_all()
+ .MOV(component(sources[0], 7), fs_reg(0xffffu));
}
length++;
/* Set the atomic operation offset. */
sources[1] = vgrf(glsl_type::uint_type);
- emit(MOV(sources[1], offset));
+ bld.MOV(sources[1], offset);
length++;
/* Set the atomic operation arguments. */
if (src0.file != BAD_FILE) {
sources[length] = vgrf(glsl_type::uint_type);
- emit(MOV(sources[length], src0));
+ bld.MOV(sources[length], src0);
length++;
}
if (src1.file != BAD_FILE) {
sources[length] = vgrf(glsl_type::uint_type);
- emit(MOV(sources[length], src1));
+ bld.MOV(sources[length], src1);
length++;
}
int mlen = 1 + (length - 1) * reg_width;
fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_UD, dispatch_width);
- emit(LOAD_PAYLOAD(src_payload, sources, length, 1));
+ bld.LOAD_PAYLOAD(src_payload, sources, length, 1);
/* Emit the instruction. */
- fs_inst *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload,
- fs_reg(surf_index), fs_reg(atomic_op));
+ fs_inst *inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload,
+ fs_reg(surf_index), fs_reg(atomic_op));
inst->mlen = mlen;
}
@@ -3326,17 +1188,17 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* Initialize the sample mask in the message header. */
- emit(MOV(sources[0], fs_reg(0u)))
- ->force_writemask_all = true;
+ bld.exec_all()
+ .MOV(sources[0], fs_reg(0u));
if (stage == MESA_SHADER_FRAGMENT) {
if (((brw_wm_prog_data*)this->prog_data)->uses_kill) {
- emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1)))
- ->force_writemask_all = true;
+ bld.exec_all()
+ .MOV(component(sources[0], 7), brw_flag_reg(0, 1));
} else {
- emit(MOV(component(sources[0], 7),
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
- ->force_writemask_all = true;
+ bld.exec_all()
+ .MOV(component(sources[0], 7),
+ retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
}
} else {
/* The execution mask is part of the side-band information sent together with
@@ -3345,48 +1207,25 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
* the atomic operation.
*/
assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE);
- emit(MOV(component(sources[0], 7),
- fs_reg(0xffffu)))->force_writemask_all = true;
+ bld.exec_all()
+ .MOV(component(sources[0], 7), fs_reg(0xffffu));
}
/* Set the surface read offset. */
sources[1] = vgrf(glsl_type::uint_type);
- emit(MOV(sources[1], offset));
+ bld.MOV(sources[1], offset);
int mlen = 1 + reg_width;
fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_UD, dispatch_width);
- fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2, 1));
+ fs_inst *inst = bld.LOAD_PAYLOAD(src_payload, sources, 2, 1);
/* Emit the instruction. */
- inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload,
- fs_reg(surf_index), fs_reg(1));
+ inst = bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload,
+ fs_reg(surf_index), fs_reg(1));
inst->mlen = mlen;
}
-fs_inst *
-fs_visitor::emit(fs_inst *inst)
-{
- if (dispatch_width == 16 && inst->exec_size == 8)
- inst->force_uncompressed = true;
-
- inst->annotation = this->current_annotation;
- inst->ir = this->base_ir;
-
- this->instructions.push_tail(inst);
-
- return inst;
-}
-
-void
-fs_visitor::emit(exec_list list)
-{
- foreach_in_list_safe(fs_inst, inst, &list) {
- inst->exec_node::remove();
- emit(inst);
- }
-}
-
/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
void
fs_visitor::emit_dummy_fs()
@@ -3396,12 +1235,12 @@ fs_visitor::emit_dummy_fs()
/* Everyone's favorite color. */
const float color[4] = { 1.0, 0.0, 1.0, 0.0 };
for (int i = 0; i < 4; i++) {
- emit(MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F,
- dispatch_width), fs_reg(color[i])));
+ bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F,
+ dispatch_width), fs_reg(color[i]));
}
fs_inst *write;
- write = emit(FS_OPCODE_FB_WRITE);
+ write = bld.emit(FS_OPCODE_FB_WRITE);
write->eot = true;
if (devinfo->gen >= 6) {
write->base_mrf = 2;
@@ -3454,19 +1293,19 @@ fs_visitor::emit_interpolation_setup_gen4()
{
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
- this->current_annotation = "compute pixel centers";
+ fs_builder abld = bld.annotate("compute pixel centers");
this->pixel_x = vgrf(glsl_type::uint_type);
this->pixel_y = vgrf(glsl_type::uint_type);
this->pixel_x.type = BRW_REGISTER_TYPE_UW;
this->pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(ADD(this->pixel_x,
+ abld.ADD(this->pixel_x,
fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
- fs_reg(brw_imm_v(0x10101010))));
- emit(ADD(this->pixel_y,
+ fs_reg(brw_imm_v(0x10101010)));
+ abld.ADD(this->pixel_y,
fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
- fs_reg(brw_imm_v(0x11001100))));
+ fs_reg(brw_imm_v(0x11001100)));
- this->current_annotation = "compute pixel deltas from v0";
+ abld = bld.annotate("compute pixel deltas from v0");
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
vgrf(glsl_type::vec2_type);
@@ -3475,27 +1314,27 @@ fs_visitor::emit_interpolation_setup_gen4()
const fs_reg ystart(negate(brw_vec1_grf(1, 1)));
if (devinfo->has_pln && dispatch_width == 16) {
- emit(ADD(half(offset(delta_xy, 0), 0), half(this->pixel_x, 0), xstart));
- emit(ADD(half(offset(delta_xy, 0), 1), half(this->pixel_y, 0), ystart));
- emit(ADD(half(offset(delta_xy, 1), 0), half(this->pixel_x, 1), xstart))
- ->force_sechalf = true;
- emit(ADD(half(offset(delta_xy, 1), 1), half(this->pixel_y, 1), ystart))
- ->force_sechalf = true;
+ for (unsigned i = 0; i < 2; i++) {
+ abld.half(i).ADD(half(offset(delta_xy, i), 0),
+ half(this->pixel_x, i), xstart);
+ abld.half(i).ADD(half(offset(delta_xy, i), 1),
+ half(this->pixel_y, i), ystart);
+ }
} else {
- emit(ADD(offset(delta_xy, 0), this->pixel_x, xstart));
- emit(ADD(offset(delta_xy, 1), this->pixel_y, ystart));
+ abld.ADD(offset(delta_xy, 0), this->pixel_x, xstart);
+ abld.ADD(offset(delta_xy, 1), this->pixel_y, ystart);
}
- this->current_annotation = "compute pos.w and 1/pos.w";
+ abld = bld.annotate("compute pos.w and 1/pos.w");
/* Compute wpos.w. It's always in our setup, since it's needed to
* interpolate the other attributes.
*/
this->wpos_w = vgrf(glsl_type::float_type);
- emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, interp_reg(VARYING_SLOT_POS, 3));
+ abld.emit(FS_OPCODE_LINTERP, wpos_w, delta_xy,
+ interp_reg(VARYING_SLOT_POS, 3));
/* Compute the pixel 1/W value from wpos.w. */
this->pixel_w = vgrf(glsl_type::float_type);
- emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
- this->current_annotation = NULL;
+ abld.emit(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
}
/** Emits the interpolation for the varying inputs. */
@@ -3504,8 +1343,8 @@ fs_visitor::emit_interpolation_setup_gen6()
{
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
- this->current_annotation = "compute pixel centers";
- if (brw->gen >= 8 || dispatch_width == 8) {
+ fs_builder abld = bld.annotate("compute pixel centers");
+ if (devinfo->gen >= 8 || dispatch_width == 8) {
/* The "Register Region Restrictions" page says for BDW (and newer,
* presumably):
*
@@ -3518,15 +1357,15 @@ fs_visitor::emit_interpolation_setup_gen6()
*/
fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8),
BRW_REGISTER_TYPE_UW, dispatch_width * 2);
- emit(ADD(int_pixel_xy,
+ abld.exec_all()
+ .ADD(int_pixel_xy,
fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
- fs_reg(brw_imm_v(0x11001010))))
- ->force_writemask_all = true;
+ fs_reg(brw_imm_v(0x11001010)));
this->pixel_x = vgrf(glsl_type::float_type);
this->pixel_y = vgrf(glsl_type::float_type);
- emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy);
- emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy);
+ abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy);
+ abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy);
} else {
/* The "Register Region Restrictions" page says for SNB, IVB, HSW:
*
@@ -3540,12 +1379,12 @@ fs_visitor::emit_interpolation_setup_gen6()
fs_reg int_pixel_y = vgrf(glsl_type::uint_type);
int_pixel_x.type = BRW_REGISTER_TYPE_UW;
int_pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(ADD(int_pixel_x,
+ abld.ADD(int_pixel_x,
fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
- fs_reg(brw_imm_v(0x10101010))));
- emit(ADD(int_pixel_y,
+ fs_reg(brw_imm_v(0x10101010)));
+ abld.ADD(int_pixel_y,
fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
- fs_reg(brw_imm_v(0x11001100))));
+ fs_reg(brw_imm_v(0x11001100)));
/* As of gen6, we can no longer mix float and int sources. We have
* to turn the integer pixel centers into floats for their actual
@@ -3553,21 +1392,19 @@ fs_visitor::emit_interpolation_setup_gen6()
*/
this->pixel_x = vgrf(glsl_type::float_type);
this->pixel_y = vgrf(glsl_type::float_type);
- emit(MOV(this->pixel_x, int_pixel_x));
- emit(MOV(this->pixel_y, int_pixel_y));
+ abld.MOV(this->pixel_x, int_pixel_x);
+ abld.MOV(this->pixel_y, int_pixel_y);
}
- this->current_annotation = "compute pos.w";
+ abld = bld.annotate("compute pos.w");
this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0));
this->wpos_w = vgrf(glsl_type::float_type);
- emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
+ abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
uint8_t reg = payload.barycentric_coord_reg[i];
this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0));
}
-
- this->current_annotation = NULL;
}
void
@@ -3581,7 +1418,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
fs_reg tmp = vgrf(glsl_type::vec4_type);
assert(color.type == BRW_REGISTER_TYPE_F);
for (unsigned i = 0; i < components; i++) {
- inst = emit(MOV(offset(tmp, i), offset(color, i)));
+ inst = bld.MOV(offset(tmp, i), offset(color, i));
inst->saturate = true;
}
color = tmp;
@@ -3627,7 +1464,7 @@ fs_visitor::emit_alpha_test()
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- this->current_annotation = "Alpha test";
+ const fs_builder abld = bld.annotate("Alpha test");
fs_inst *cmp;
if (key->alpha_test_func == GL_ALWAYS)
@@ -3637,30 +1474,29 @@ fs_visitor::emit_alpha_test()
/* f0.1 = 0 */
fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW));
- cmp = emit(CMP(reg_null_f, some_reg, some_reg,
- BRW_CONDITIONAL_NEQ));
+ cmp = abld.CMP(bld.null_reg_f(), some_reg, some_reg,
+ BRW_CONDITIONAL_NEQ);
} else {
/* RT0 alpha */
fs_reg color = offset(outputs[0], 3);
/* f0.1 &= func(color, ref) */
- cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref),
- cond_for_alpha_func(key->alpha_test_func)));
+ cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref),
+ cond_for_alpha_func(key->alpha_test_func));
}
cmp->predicate = BRW_PREDICATE_NORMAL;
cmp->flag_subreg = 1;
}
fs_inst *
-fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
+fs_visitor::emit_single_fb_write(const fs_builder &bld,
+ fs_reg color0, fs_reg color1,
fs_reg src0_alpha, unsigned components,
unsigned exec_size, bool use_2nd_half)
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
- this->current_annotation = "FB write header";
int header_size = 2, payload_header_size;
/* We can potentially have a message length of up to 15, so we have to set
@@ -3691,22 +1527,23 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (payload.aa_dest_stencil_reg) {
sources[length] = fs_reg(GRF, alloc.allocate(1));
- emit(MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
+ bld.exec_all().annotate("FB write stencil/AA alpha")
+ .MOV(sources[length],
+ fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
length++;
}
prog_data->uses_omask =
prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
if (prog_data->uses_omask) {
- this->current_annotation = "FB write oMask";
assert(this->sample_mask.file != BAD_FILE);
/* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since
* it's unsinged single words, one vgrf is always 16-wide.
*/
sources[length] = fs_reg(GRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UW, 16);
- emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
+ bld.exec_all().annotate("FB write oMask")
+ .emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
length++;
}
@@ -3752,7 +1589,11 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth.file != BAD_FILE);
- sources[length] = this->frag_depth;
+ if (exec_size < dispatch_width) {
+ sources[length] = half(this->frag_depth, use_2nd_half);
+ } else {
+ sources[length] = this->frag_depth;
+ }
} else {
/* Pass through the payload depth. */
sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
@@ -3763,28 +1604,29 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (payload.dest_depth_reg)
sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0));
+ const fs_builder ubld = bld.group(exec_size, use_2nd_half);
fs_inst *load;
fs_inst *write;
if (devinfo->gen >= 7) {
/* Send from the GRF */
fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F, exec_size);
- load = emit(LOAD_PAYLOAD(payload, sources, length, payload_header_size));
+ load = ubld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
payload.reg = alloc.allocate(load->regs_written);
load->dst = payload;
- write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
+ write = ubld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
write->base_mrf = -1;
} else {
/* Send from the MRF */
- load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size),
- sources, length, payload_header_size));
+ load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size),
+ sources, length, payload_header_size);
/* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD
* will do this for us if we just give it a COMPR4 destination.
*/
- if (brw->gen < 6 && exec_size == 16)
+ if (devinfo->gen < 6 && exec_size == 16)
load->dst.reg |= BRW_MRF_COMPR4;
- write = emit(FS_OPCODE_FB_WRITE);
+ write = ubld.emit(FS_OPCODE_FB_WRITE);
write->exec_size = exec_size;
write->base_mrf = 1;
}
@@ -3807,10 +1649,10 @@ fs_visitor::emit_fb_writes()
fs_inst *inst = NULL;
if (do_dual_src) {
- this->current_annotation = ralloc_asprintf(this->mem_ctx,
- "FB dual-source write");
- inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
- reg_undef, 4, 8);
+ const fs_builder abld = bld.annotate("FB dual-source write");
+
+ inst = emit_single_fb_write(abld, this->outputs[0],
+ this->dual_src_output, reg_undef, 4, 8);
inst->target = 0;
/* SIMD16 dual source blending requires to send two SIMD8 dual source
@@ -3831,8 +1673,9 @@ fs_visitor::emit_fb_writes()
* m + 3: a1
*/
if (dispatch_width == 16) {
- inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
- reg_undef, 4, 8, true);
+ inst = emit_single_fb_write(abld, this->outputs[0],
+ this->dual_src_output, reg_undef, 4, 8,
+ true);
inst->target = 0;
}
@@ -3843,14 +1686,14 @@ fs_visitor::emit_fb_writes()
if (this->outputs[target].file == BAD_FILE)
continue;
- this->current_annotation = ralloc_asprintf(this->mem_ctx,
- "FB write target %d",
- target);
+ const fs_builder abld = bld.annotate(
+ ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
+
fs_reg src0_alpha;
if (devinfo->gen >= 6 && key->replicate_alpha && target != 0)
src0_alpha = offset(outputs[0], 3);
- inst = emit_single_fb_write(this->outputs[target], reg_undef,
+ inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
src0_alpha,
this->output_components[target],
dispatch_width);
@@ -3863,19 +1706,17 @@ fs_visitor::emit_fb_writes()
* alpha out the pipeline to our null renderbuffer to support
* alpha-testing, alpha-to-coverage, and so on.
*/
- inst = emit_single_fb_write(reg_undef, reg_undef, reg_undef, 0,
+ inst = emit_single_fb_write(bld, reg_undef, reg_undef, reg_undef, 0,
dispatch_width);
inst->target = 0;
}
inst->eot = true;
- this->current_annotation = NULL;
}
void
-fs_visitor::setup_uniform_clipplane_values()
+fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
{
- gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
const struct brw_vue_prog_key *key =
(const struct brw_vue_prog_key *) this->key;
@@ -3889,7 +1730,7 @@ fs_visitor::setup_uniform_clipplane_values()
}
}
-void fs_visitor::compute_clip_distance()
+void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
{
struct brw_vue_prog_data *vue_prog_data =
(struct brw_vue_prog_data *) prog_data;
@@ -3918,9 +1759,9 @@ void fs_visitor::compute_clip_distance()
if (outputs[clip_vertex].file == BAD_FILE)
return;
- setup_uniform_clipplane_values();
+ setup_uniform_clipplane_values(clip_planes);
- current_annotation = "user clip distances";
+ const fs_builder abld = bld.annotate("user clip distances");
this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
@@ -3930,16 +1771,16 @@ void fs_visitor::compute_clip_distance()
fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4];
output.reg_offset = i & 3;
- emit(MUL(output, outputs[clip_vertex], u));
+ abld.MUL(output, outputs[clip_vertex], u);
for (int j = 1; j < 4; j++) {
u.reg = userplane[i].reg + j;
- emit(MAD(output, output, offset(outputs[clip_vertex], j), u));
+ abld.MAD(output, output, offset(outputs[clip_vertex], j), u);
}
}
}
void
-fs_visitor::emit_urb_writes()
+fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
{
int slot, urb_offset, length;
struct brw_vs_prog_data *vs_prog_data =
@@ -3954,18 +1795,17 @@ fs_visitor::emit_urb_writes()
/* Lower legacy ff and ClipVertex clipping to clip distances */
if (key->base.userclip_active && !prog->UsesClipDistanceOut)
- compute_clip_distance();
+ compute_clip_distance(clip_planes);
/* If we don't have any valid slots to write, just do a minimal urb write
* send to terminate the shader. */
if (vue_map->slots_valid == 0) {
fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
- BRW_REGISTER_TYPE_UD))));
- inst->force_writemask_all = true;
+ bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
+ BRW_REGISTER_TYPE_UD)));
- inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+ fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
inst->eot = true;
inst->mlen = 1;
inst->offset = 1;
@@ -3994,7 +1834,7 @@ fs_visitor::emit_urb_writes()
}
zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- emit(MOV(zero, fs_reg(0u)));
+ bld.MOV(zero, fs_reg(0u));
sources[length++] = zero;
if (vue_map->slots_valid & VARYING_BIT_LAYER)
@@ -4049,8 +1889,7 @@ fs_visitor::emit_urb_writes()
for (int i = 0; i < 4; i++) {
reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
src = offset(this->outputs[varying], i);
- fs_inst *inst = emit(MOV(reg, src));
- inst->saturate = true;
+ set_saturate(true, bld.MOV(reg, src));
sources[length++] = reg;
}
} else {
@@ -4060,7 +1899,7 @@ fs_visitor::emit_urb_writes()
break;
}
- current_annotation = "URB write";
+ const fs_builder abld = bld.annotate("URB write");
/* If we've queued up 8 registers of payload (2 VUE slots), if this is
* the last slot or if we need to flush (see BAD_FILE varying case
@@ -4073,22 +1912,14 @@ fs_visitor::emit_urb_writes()
fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
BRW_REGISTER_TYPE_F, dispatch_width);
-
- /* We need WE_all on the MOV for the message header (the URB handles)
- * so do a MOV to a dummy register and set force_writemask_all on the
- * MOV. LOAD_PAYLOAD will preserve that.
- */
- fs_reg dummy = fs_reg(GRF, alloc.allocate(1),
- BRW_REGISTER_TYPE_UD);
- fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0),
- BRW_REGISTER_TYPE_UD))));
- inst->force_writemask_all = true;
- payload_sources[0] = dummy;
+ payload_sources[0] =
+ fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
memcpy(&payload_sources[1], sources, length * sizeof sources[0]);
- emit(LOAD_PAYLOAD(payload, payload_sources, length + 1, 1));
+ abld.LOAD_PAYLOAD(payload, payload_sources, length + 1, 1);
- inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+ fs_inst *inst =
+ abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
inst->eot = last;
inst->mlen = length + 1;
inst->offset = urb_offset;
@@ -4100,21 +1931,9 @@ fs_visitor::emit_urb_writes()
}
void
-fs_visitor::resolve_ud_negate(fs_reg *reg)
-{
- if (reg->type != BRW_REGISTER_TYPE_UD ||
- !reg->negate)
- return;
-
- fs_reg temp = vgrf(glsl_type::uint_type);
- emit(MOV(temp, *reg));
- *reg = temp;
-}
-
-void
fs_visitor::emit_cs_terminate()
{
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
/* We are getting the thread ID from the compute shader header */
assert(stage == MESA_SHADER_COMPUTE);
@@ -4125,94 +1944,53 @@ fs_visitor::emit_cs_terminate()
*/
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- fs_inst *inst = emit(MOV(payload, g0));
- inst->force_writemask_all = true;
+ bld.exec_all().MOV(payload, g0);
/* Send a message to the thread spawner to terminate the thread. */
- inst = emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
+ fs_inst *inst = bld.exec_all()
+ .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
inst->eot = true;
}
-/**
- * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
- *
- * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined.
- * If we need a proper boolean value, we have to fix it up to be 0 or ~0.
- */
void
-fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
+fs_visitor::emit_barrier()
{
- assert(devinfo->gen <= 5);
+ assert(devinfo->gen >= 7);
- if (rvalue->type != glsl_type::bool_type)
- return;
+ /* We are getting the barrier ID from the compute shader header */
+ assert(stage == MESA_SHADER_COMPUTE);
- fs_reg and_result = vgrf(glsl_type::bool_type);
- fs_reg neg_result = vgrf(glsl_type::bool_type);
- emit(AND(and_result, *reg, fs_reg(1)));
- emit(MOV(neg_result, negate(and_result)));
- *reg = neg_result;
-}
+ fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-fs_visitor::fs_visitor(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
- struct gl_fragment_program *fp,
- unsigned dispatch_width)
- : backend_visitor(brw, shader_prog, &fp->Base, &prog_data->base,
- MESA_SHADER_FRAGMENT),
- reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)),
- reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)),
- reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)),
- key(key), prog_data(&prog_data->base),
- dispatch_width(dispatch_width), promoted_constants(0)
-{
- this->mem_ctx = mem_ctx;
- init();
-}
+ /* Clear the message payload */
+ bld.exec_all().MOV(payload, fs_reg(0u));
-fs_visitor::fs_visitor(struct brw_context *brw,
- void *mem_ctx,
- const struct brw_vs_prog_key *key,
- struct brw_vs_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
- struct gl_vertex_program *cp,
- unsigned dispatch_width)
- : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base.base,
- MESA_SHADER_VERTEX),
- reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)),
- reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)),
- reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)),
- key(key), prog_data(&prog_data->base.base),
- dispatch_width(dispatch_width), promoted_constants(0)
-{
- this->mem_ctx = mem_ctx;
- init();
+ /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
+ fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
+ bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
+
+ /* Emit a gateway "barrier" message using the payload we set up, followed
+ * by a wait instruction.
+ */
+ bld.exec_all().emit(SHADER_OPCODE_BARRIER, reg_undef, payload);
}
-fs_visitor::fs_visitor(struct brw_context *brw,
+fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
- const struct brw_cs_prog_key *key,
- struct brw_cs_prog_data *prog_data,
+ gl_shader_stage stage,
+ const void *key,
+ struct brw_stage_prog_data *prog_data,
struct gl_shader_program *shader_prog,
- struct gl_compute_program *cp,
- unsigned dispatch_width)
- : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base,
- MESA_SHADER_COMPUTE),
- reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)),
- reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)),
- reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)),
- key(key), prog_data(&prog_data->base),
- dispatch_width(dispatch_width)
-{
- this->mem_ctx = mem_ctx;
- init();
-}
-
-void
-fs_visitor::init()
+ struct gl_program *prog,
+ unsigned dispatch_width,
+ int shader_time_index)
+ : backend_shader(compiler, log_data, mem_ctx,
+ shader_prog, prog, prog_data, stage),
+ key(key), prog_data(prog_data),
+ dispatch_width(dispatch_width),
+ shader_time_index(shader_time_index),
+ promoted_constants(0),
+ bld(fs_builder(this, dispatch_width).at_end())
{
switch (stage) {
case MESA_SHADER_FRAGMENT:
@@ -4232,9 +2010,6 @@ fs_visitor::init()
this->failed = false;
this->simd16_unsupported = false;
this->no16_msg = NULL;
- this->variable_ht = hash_table_ctor(0,
- hash_table_pointer_hash,
- hash_table_pointer_compare);
this->nir_locals = NULL;
this->nir_globals = NULL;
@@ -4247,9 +2022,6 @@ fs_visitor::init()
this->first_non_payload_grf = 0;
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
- this->current_annotation = NULL;
- this->base_ir = NULL;
-
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
this->live_intervals = NULL;
@@ -4269,5 +2041,4 @@ fs_visitor::init()
fs_visitor::~fs_visitor()
{
- hash_table_dtor(this->variable_ht);
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index a323e4d9031..0b8bfc3d9bd 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -47,11 +47,12 @@ brw_upload_gs_pull_constants(struct brw_context *brw)
return;
/* BRW_NEW_GS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = &brw->gs.prog_data->base.base;
+ const struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base;
+ const bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
/* _NEW_PROGRAM_CONSTANTS */
brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program.Base,
- stage_state, prog_data, false);
+ stage_state, &prog_data->base, dword_pitch);
}
const struct brw_tracked_state brw_gs_pull_constants = {
@@ -77,8 +78,11 @@ brw_upload_gs_ubo_surfaces(struct brw_context *brw)
return;
/* BRW_NEW_GS_PROG_DATA */
+ struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base;
+ bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
+
brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
- &brw->gs.base, &brw->gs.prog_data->base.base, false);
+ &brw->gs.base, &prog_data->base, dword_pitch);
}
const struct brw_tracked_state brw_gs_ubo_surfaces = {
diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h
index e347c518348..7a8c210118c 100644
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -322,6 +322,9 @@ FJ(gen4_jump_count, 111, 96, devinfo->gen < 6)
FC(gen4_pop_count, 115, 112, devinfo->gen < 6)
/** @} */
+/* Message descriptor bits */
+#define MD(x) ((x) + 96)
+
/**
* Fields for SEND messages:
* @{
@@ -347,6 +350,7 @@ FF(header_present,
/* 6: */ 115, 115,
/* 7: */ 115, 115,
/* 8: */ 115, 115)
+F(gateway_notify, MD(16), MD(15))
FF(function_control,
/* 4: */ 111, 96,
/* 4.5: */ 111, 96,
@@ -354,6 +358,13 @@ FF(function_control,
/* 6: */ 114, 96,
/* 7: */ 114, 96,
/* 8: */ 114, 96)
+FF(gateway_subfuncid,
+ /* 4: */ MD(1), MD(0),
+ /* 4.5: */ MD(1), MD(0),
+ /* 5: */ MD(1), MD(0), /* 2:0, but bit 2 is reserved MBZ */
+ /* 6: */ MD(2), MD(0),
+ /* 7: */ MD(2), MD(0),
+ /* 8: */ MD(2), MD(0))
FF(sfid,
/* 4: */ 123, 120, /* called msg_target */
/* 4.5 */ 123, 120,
@@ -364,9 +375,6 @@ FF(sfid,
FC(base_mrf, 27, 24, devinfo->gen < 6);
/** @} */
-/* Message descriptor bits */
-#define MD(x) (x + 96)
-
/**
* URB message function control bits:
* @{
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index f3dfe790f34..96dc20da3cf 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -131,14 +131,15 @@ horiz_offset(fs_reg reg, unsigned delta)
static inline fs_reg
offset(fs_reg reg, unsigned delta)
{
- assert(reg.stride > 0);
switch (reg.file) {
case BAD_FILE:
break;
case GRF:
case MRF:
case ATTR:
- return byte_offset(reg, delta * reg.width * reg.stride * type_sz(reg.type));
+ return byte_offset(reg,
+ delta * MAX2(reg.width * reg.stride, 1) *
+ type_sz(reg.type));
case UNIFORM:
reg.reg_offset += delta;
break;
@@ -155,6 +156,7 @@ component(fs_reg reg, unsigned idx)
assert(idx < reg.width);
reg.subreg_offset = idx * type_sz(reg.type);
reg.width = 1;
+ reg.stride = 0;
return reg;
}
@@ -254,9 +256,62 @@ public:
uint8_t exec_size;
bool eot:1;
- bool force_uncompressed:1;
bool force_sechalf:1;
bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
};
+/**
+ * Set second-half quarter control on \p inst.
+ */
+static inline fs_inst *
+set_sechalf(fs_inst *inst)
+{
+ inst->force_sechalf = true;
+ return inst;
+}
+
+/**
+ * Make the execution of \p inst dependent on the evaluation of a possibly
+ * inverted predicate.
+ */
+static inline fs_inst *
+set_predicate_inv(enum brw_predicate pred, bool inverse,
+ fs_inst *inst)
+{
+ inst->predicate = pred;
+ inst->predicate_inverse = inverse;
+ return inst;
+}
+
+/**
+ * Make the execution of \p inst dependent on the evaluation of a predicate.
+ */
+static inline fs_inst *
+set_predicate(enum brw_predicate pred, fs_inst *inst)
+{
+ return set_predicate_inv(pred, false, inst);
+}
+
+/**
+ * Write the result of evaluating the condition given by \p mod to a flag
+ * register.
+ */
+static inline fs_inst *
+set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
+{
+ inst->conditional_mod = mod;
+ return inst;
+}
+
+/**
+ * Clamp the result of \p inst to the saturation range of its destination
+ * datatype.
+ */
+static inline fs_inst *
+set_saturate(bool saturate, fs_inst *inst)
+{
+ inst->saturate = saturate;
+ return inst;
+}
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index a56fdd6fce9..fceacae0e51 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -190,6 +190,50 @@ public:
}
};
+/**
+ * Make the execution of \p inst dependent on the evaluation of a possibly
+ * inverted predicate.
+ */
+inline vec4_instruction *
+set_predicate_inv(enum brw_predicate pred, bool inverse,
+ vec4_instruction *inst)
+{
+ inst->predicate = pred;
+ inst->predicate_inverse = inverse;
+ return inst;
+}
+
+/**
+ * Make the execution of \p inst dependent on the evaluation of a predicate.
+ */
+inline vec4_instruction *
+set_predicate(enum brw_predicate pred, vec4_instruction *inst)
+{
+ return set_predicate_inv(pred, false, inst);
+}
+
+/**
+ * Write the result of evaluating the condition given by \p mod to a flag
+ * register.
+ */
+inline vec4_instruction *
+set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
+{
+ inst->conditional_mod = mod;
+ return inst;
+}
+
+/**
+ * Clamp the result of \p inst to the saturation range of its destination
+ * datatype.
+ */
+inline vec4_instruction *
+set_saturate(bool saturate, vec4_instruction *inst)
+{
+ inst->saturate = saturate;
+ return inst;
+}
+
} /* namespace brw */
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
index 0424003ffd5..7a5f9834423 100644
--- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
@@ -89,19 +89,18 @@ txs_type(const glsl_type *type)
ir_visitor_status
lower_texture_grad_visitor::visit_leave(ir_texture *ir)
{
- /* Only lower textureGrad with shadow samplers */
- if (ir->op != ir_txd || !ir->shadow_comparitor)
+ /* Only lower textureGrad with cube maps or shadow samplers */
+ if (ir->op != ir_txd ||
+ (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE &&
+ !ir->shadow_comparitor))
return visit_continue;
- /* Lower textureGrad() with samplerCubeShadow even if we have the sample_d_c
+ /* Lower textureGrad() with samplerCube* even if we have the sample_d_c
* message. GLSL provides gradients for the 'r' coordinate. Unfortunately:
*
* From the Ivybridge PRM, Volume 4, Part 1, sample_d message description:
* "The r coordinate contains the faceid, and the r gradients are ignored
* by hardware."
- *
- * We likely need to do a similar treatment for samplerCube and
- * samplerCubeArray, but we have insufficient testing for that at the moment.
*/
bool need_lowering = !has_sample_d_c ||
ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
@@ -155,9 +154,20 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
expr(ir_unop_sqrt, dot(dPdy, dPdy)));
}
- /* lambda_base = log2(rho). We're ignoring GL state biases for now. */
+ /* lambda_base = log2(rho). We're ignoring GL state biases for now.
+ *
+ * For cube maps the result of these formulas is giving us a value of rho
+ * that is twice the value we should use, so divide it by 2 or,
+ * alternatively, remove one unit from the result of the log2 computation.
+ */
ir->op = ir_txl;
- ir->lod_info.lod = expr(ir_unop_log2, rho);
+ if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
+ ir->lod_info.lod = expr(ir_binop_add,
+ expr(ir_unop_log2, rho),
+ new(mem_ctx) ir_constant(-1.0f));
+ } else {
+ ir->lod_info.lod = expr(ir_unop_log2, rho);
+ }
progress = true;
return visit_continue;
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 06916e28cbd..49f2e3e498c 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -339,8 +339,13 @@ is_color_fast_clear_compatible(struct brw_context *brw,
mesa_format format,
const union gl_color_union *color)
{
- if (_mesa_is_format_integer_color(format))
+ if (_mesa_is_format_integer_color(format)) {
+ if (brw->gen >= 8) {
+ perf_debug("Integer fast clear not enabled for (%s)",
+ _mesa_get_format_name(format));
+ }
return false;
+ }
for (int i = 0; i < 4; i++) {
if (color->f[i] != 0.0 && color->f[i] != 1.0 &&
@@ -466,7 +471,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
* linear (untiled) memory is UNDEFINED."
*/
if (irb->mt->tiling == I915_TILING_NONE) {
- perf_debug("falling back to plain clear because buffers are untiled\n");
+ perf_debug("Falling back to plain clear because %dx%d buffer is untiled\n",
+ irb->mt->logical_width0, irb->mt->logical_height0);
clear_type = PLAIN_CLEAR;
}
@@ -477,7 +483,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
for (int i = 0; i < 4; i++) {
if (_mesa_format_has_color_component(irb->mt->format, i) &&
!color_mask[i]) {
- perf_debug("falling back to plain clear because of color mask\n");
+ perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n",
+ irb->mt->logical_width0, irb->mt->logical_height0);
clear_type = PLAIN_CLEAR;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
index fc7018d15b9..d079197a2a9 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -414,6 +414,12 @@ brw_meta_stencil_blit(struct brw_context *brw,
GLenum target;
_mesa_meta_fb_tex_blit_begin(ctx, &blit);
+ /* XXX: Pretend to support stencil textures so _mesa_base_tex_format()
+ * returns a valid format. When we properly support the extension, we
+ * should remove this.
+ */
+ assert(ctx->Extensions.ARB_texture_stencil8 == false);
+ ctx->Extensions.ARB_texture_stencil8 = true;
_mesa_GenFramebuffers(1, &fbo);
/* Force the surface to be configured for level zero. */
@@ -451,6 +457,7 @@ brw_meta_stencil_blit(struct brw_context *brw,
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
error:
+ ctx->Extensions.ARB_texture_stencil8 = false;
_mesa_meta_fb_tex_blit_end(ctx, target, &blit);
_mesa_meta_end(ctx);
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 67a693b5ec1..5a4515b582d 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -39,6 +39,7 @@
#include "brw_state.h"
#include "brw_defines.h"
+#include "main/framebuffer.h"
#include "main/fbobject.h"
#include "main/glformats.h"
@@ -46,12 +47,14 @@
static void upload_drawing_rect(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
+ const unsigned int fb_width = _mesa_geometric_width(fb);
+ const unsigned int fb_height = _mesa_geometric_height(fb);
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0); /* xmin, ymin */
- OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
- ((ctx->DrawBuffer->Height - 1) << 16));
+ OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16));
OUT_BATCH(0);
ADVANCE_BATCH();
}
@@ -767,7 +770,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
* works just fine, and there's no window system to worry about.
*/
if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
- OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
+ OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31);
else
OUT_BATCH(0);
ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index e4119b1aa3f..b7bb2315b97 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -122,18 +122,9 @@ brw_create_nir(struct brw_context *brw,
/* Get rid of split copies */
nir_optimize(nir);
- if (shader_prog) {
- nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
- &nir->num_direct_uniforms,
- &nir->num_uniforms);
- } else {
- /* ARB programs generally create a giant array of "uniform" data, and allow
- * indirect addressing without any boundaries. In the absence of bounds
- * analysis, it's all or nothing. num_direct_uniforms is only useful when
- * we have some direct and some indirect access; it doesn't matter here.
- */
- nir->num_direct_uniforms = 0;
- }
+ nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
+ &nir->num_direct_uniforms,
+ &nir->num_uniforms);
nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
@@ -176,6 +167,12 @@ brw_create_nir(struct brw_context *brw,
nir_validate_shader(nir);
if (unlikely(debug_enabled)) {
+ /* Re-index SSA defs so we print more sensible numbers. */
+ nir_foreach_overload(nir, overload) {
+ if (overload->impl)
+ nir_index_ssa_defs(overload->impl);
+ }
+
fprintf(stderr, "NIR (SSA form) for %s shader:\n",
_mesa_shader_stage_to_string(stage));
nir_print_shader(nir, stderr);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index b056fbfc427..ea128ccb670 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -88,7 +88,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
return NULL;
}
- case MESA_GEOMETRY_PROGRAM: {
+ case GL_GEOMETRY_PROGRAM_NV: {
struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
@@ -287,18 +287,24 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
functions->MemoryBarrier = brw_memory_barrier;
}
+struct shader_times {
+ uint64_t time;
+ uint64_t written;
+ uint64_t reset;
+};
+
void
brw_init_shader_time(struct brw_context *brw)
{
- const int max_entries = 4096;
- brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
- max_entries * SHADER_TIME_STRIDE,
- 4096);
+ const int max_entries = 2048;
+ brw->shader_time.bo =
+ drm_intel_bo_alloc(brw->bufmgr, "shader time",
+ max_entries * SHADER_TIME_STRIDE * 3, 4096);
brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
max_entries);
- brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+ brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
max_entries);
brw->shader_time.max_entries = max_entries;
}
@@ -319,27 +325,6 @@ compare_time(const void *a, const void *b)
}
static void
-get_written_and_reset(struct brw_context *brw, int i,
- uint64_t *written, uint64_t *reset)
-{
- enum shader_time_shader_type type = brw->shader_time.types[i];
- assert(type == ST_VS || type == ST_GS || type == ST_FS8 ||
- type == ST_FS16 || type == ST_CS);
-
- /* Find where we recorded written and reset. */
- int wi, ri;
-
- for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
- ;
-
- for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
- ;
-
- *written = brw->shader_time.cumulative[wi];
- *reset = brw->shader_time.cumulative[ri];
-}
-
-static void
print_shader_time_line(const char *stage, const char *name,
int shader_num, uint64_t time, uint64_t total)
{
@@ -374,26 +359,13 @@ brw_report_shader_time(struct brw_context *brw)
sorted[i] = &scaled[i];
switch (type) {
- case ST_VS_WRITTEN:
- case ST_VS_RESET:
- case ST_GS_WRITTEN:
- case ST_GS_RESET:
- case ST_FS8_WRITTEN:
- case ST_FS8_RESET:
- case ST_FS16_WRITTEN:
- case ST_FS16_RESET:
- case ST_CS_WRITTEN:
- case ST_CS_RESET:
- /* We'll handle these when along with the time. */
- scaled[i] = 0;
- continue;
-
case ST_VS:
case ST_GS:
case ST_FS8:
case ST_FS16:
case ST_CS:
- get_written_and_reset(brw, i, &written, &reset);
+ written = brw->shader_time.cumulative[i].written;
+ reset = brw->shader_time.cumulative[i].reset;
break;
default:
@@ -405,7 +377,7 @@ brw_report_shader_time(struct brw_context *brw)
break;
}
- uint64_t time = brw->shader_time.cumulative[i];
+ uint64_t time = brw->shader_time.cumulative[i].time;
if (written) {
scaled[i] = time / written * (written + reset);
} else {
@@ -491,16 +463,19 @@ brw_collect_shader_time(struct brw_context *brw)
* overhead compared to the cost of tracking the time in the first place.
*/
drm_intel_bo_map(brw->shader_time.bo, true);
-
- uint32_t *times = brw->shader_time.bo->virtual;
+ void *bo_map = brw->shader_time.bo->virtual;
for (int i = 0; i < brw->shader_time.num_entries; i++) {
- brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+ uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
+
+ brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
+ brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
+ brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
}
/* Zero the BO out to clear it out for our next collection.
*/
- memset(times, 0, brw->shader_time.bo->size);
+ memset(bo_map, 0, brw->shader_time.bo->size);
drm_intel_bo_unmap(brw->shader_time.bo);
}
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 667c9009304..aea4d9b77d3 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -66,10 +66,20 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
void
brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
{
- brw_emit_pipe_control_write(brw,
- PIPE_CONTROL_WRITE_DEPTH_COUNT
- | PIPE_CONTROL_DEPTH_STALL,
- query_bo, idx * sizeof(uint64_t), 0, 0);
+ uint32_t flags;
+
+ flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_DEPTH_STALL);
+
+ /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+ * command when loading the values into the predicate source registers for
+ * conditional rendering.
+ */
+ if (brw->predicate.supported)
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+ brw_emit_pipe_control_write(brw, flags, query_bo,
+ idx * sizeof(uint64_t), 0, 0);
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index c03a8aed796..c8b134103bb 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -765,6 +765,22 @@ brw_ip_reg(void)
}
static inline struct brw_reg
+brw_notification_reg(void)
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NOTIFICATION_COUNT,
+ 0,
+ 0,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ WRITEMASK_X);
+}
+
+static inline struct brw_reg
brw_acc_reg(unsigned width)
{
return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
@@ -778,7 +794,11 @@ brw_flag_reg(int reg, int subreg)
BRW_ARF_FLAG + reg, subreg);
}
-
+/**
+ * Return the mask register present in Gen4-5, or the related register present
+ * in Gen7.5 and later hardware referred to as "channel enable" register in
+ * the documentation.
+ */
static inline struct brw_reg
brw_mask_reg(unsigned subnr)
{
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 34f75fdd814..ee0add5d765 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -399,10 +399,10 @@ schedule_node::set_latency_gen7(bool is_haswell)
class instruction_scheduler {
public:
- instruction_scheduler(backend_visitor *v, int grf_count,
+ instruction_scheduler(backend_shader *s, int grf_count,
instruction_scheduler_mode mode)
{
- this->bv = v;
+ this->bs = s;
this->mem_ctx = ralloc_context(NULL);
this->grf_count = grf_count;
this->instructions.make_empty();
@@ -455,7 +455,7 @@ public:
int grf_count;
int time;
exec_list instructions;
- backend_visitor *bv;
+ backend_shader *bs;
instruction_scheduler_mode mode;
@@ -606,7 +606,7 @@ vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *b
schedule_node::schedule_node(backend_instruction *inst,
instruction_scheduler *sched)
{
- const struct brw_device_info *devinfo = sched->bv->devinfo;
+ const struct brw_device_info *devinfo = sched->bs->devinfo;
this->inst = inst;
this->child_array_size = 0;
@@ -1384,7 +1384,7 @@ vec4_instruction_scheduler::issue_time(backend_instruction *inst)
void
instruction_scheduler::schedule_instructions(bblock_t *block)
{
- const struct brw_device_info *devinfo = bv->devinfo;
+ const struct brw_device_info *devinfo = bs->devinfo;
backend_instruction *inst = block->end();
time = 0;
@@ -1419,7 +1419,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
if (debug) {
fprintf(stderr, "clock %4d, scheduled: ", time);
- bv->dump_instruction(chosen->inst);
+ bs->dump_instruction(chosen->inst);
}
/* Now that we've scheduled a new instruction, some of its
@@ -1435,7 +1435,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
if (debug) {
fprintf(stderr, "\tchild %d, %d parents: ", i, child->parent_count);
- bv->dump_instruction(child->inst);
+ bs->dump_instruction(child->inst);
}
child->cand_generation = cand_generation;
@@ -1474,7 +1474,7 @@ instruction_scheduler::run(cfg_t *cfg)
if (debug) {
fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bv->dump_instructions();
+ bs->dump_instructions();
}
/* Populate the remaining GRF uses array to improve the pre-regalloc
@@ -1504,7 +1504,7 @@ instruction_scheduler::run(cfg_t *cfg)
if (debug) {
fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bv->dump_instructions();
+ bs->dump_instructions();
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 014b43448ad..5d9892214a9 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -52,6 +52,12 @@ static void upload_sf_vp(struct brw_context *brw)
sizeof(*sfv), 32, &brw->sf.vp_offset);
memset(sfv, 0, sizeof(*sfv));
+ /* Accessing the fields Width and Height of gl_framebuffer to produce the
+ * values to program the viewport and scissor is fine as long as the
+ * gl_framebuffer has atleast one attachment.
+ */
+ assert(ctx->DrawBuffer->_HasAttachments);
+
if (render_to_fbo) {
y_scale = 1.0;
y_bias = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index ebfb49acf8d..06393c8ff2b 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -32,16 +32,106 @@
#include "glsl/glsl_parser_extras.h"
#include "main/shaderapi.h"
+static void
+shader_debug_log_mesa(void *data, const char *fmt, ...)
+{
+ struct brw_context *brw = (struct brw_context *)data;
+ va_list args;
+
+ va_start(args, fmt);
+ GLuint msg_id = 0;
+ _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_OTHER,
+ MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
+ va_end(args);
+}
+
+static void
+shader_perf_log_mesa(void *data, const char *fmt, ...)
+{
+ struct brw_context *brw = (struct brw_context *)data;
+
+ va_list args;
+ va_start(args, fmt);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ va_list args_copy;
+ va_copy(args_copy, args);
+ vfprintf(stderr, fmt, args_copy);
+ va_end(args_copy);
+ }
+
+ if (brw->perf_debug) {
+ GLuint msg_id = 0;
+ _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_PERFORMANCE,
+ MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
+ }
+ va_end(args);
+}
+
struct brw_compiler *
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
{
struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
compiler->devinfo = devinfo;
+ compiler->shader_debug_log = shader_debug_log_mesa;
+ compiler->shader_perf_log = shader_perf_log_mesa;
brw_fs_alloc_reg_sets(compiler);
brw_vec4_alloc_reg_set(compiler);
+ if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
+ compiler->scalar_vs = true;
+
+ nir_shader_compiler_options *nir_options =
+ rzalloc(compiler, nir_shader_compiler_options);
+ nir_options->native_integers = true;
+ /* In order to help allow for better CSE at the NIR level we tell NIR
+ * to split all ffma instructions during opt_algebraic and we then
+ * re-combine them as a later step.
+ */
+ nir_options->lower_ffma = true;
+ nir_options->lower_sub = true;
+
+ /* We want the GLSL compiler to emit code that uses condition codes */
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
+ compiler->glsl_compiler_options[i].MaxIfDepth =
+ devinfo->gen < 6 ? 16 : UINT_MAX;
+
+ compiler->glsl_compiler_options[i].EmitCondCodes = true;
+ compiler->glsl_compiler_options[i].EmitNoNoise = true;
+ compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
+ compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
+ compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
+ (i == MESA_SHADER_FRAGMENT);
+ compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
+ (i == MESA_SHADER_FRAGMENT);
+ compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
+ compiler->glsl_compiler_options[i].LowerClipDistance = true;
+ }
+
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
+ compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
+
+ if (compiler->scalar_vs) {
+ /* If we're using the scalar backend for vertex shaders, we need to
+ * configure these accordingly.
+ */
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
+ }
+
+ compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
+ compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
+
return compiler;
}
@@ -97,7 +187,7 @@ is_scalar_shader_stage(struct brw_context *brw, int stage)
case MESA_SHADER_FRAGMENT:
return true;
case MESA_SHADER_VERTEX:
- return brw->scalar_vs;
+ return brw->intelScreen->compiler->scalar_vs;
default:
return false;
}
@@ -632,6 +722,8 @@ brw_instruction_name(enum opcode op)
return "gs_ff_sync_set_primitives";
case CS_OPCODE_CS_TERMINATE:
return "cs_terminate";
+ case SHADER_OPCODE_BARRIER:
+ return "barrier";
}
unreachable("not reached");
@@ -755,19 +847,22 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
return false;
}
-backend_visitor::backend_visitor(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- gl_shader_stage stage)
- : brw(brw),
- devinfo(brw->intelScreen->devinfo),
- ctx(&brw->ctx),
+backend_shader::backend_shader(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ gl_shader_stage stage)
+ : compiler(compiler),
+ log_data(log_data),
+ devinfo(compiler->devinfo),
shader(shader_prog ?
(struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
shader_prog(shader_prog),
prog(prog),
stage_prog_data(stage_prog_data),
+ mem_ctx(mem_ctx),
cfg(NULL),
stage(stage)
{
@@ -950,7 +1045,6 @@ backend_instruction::can_do_saturate() const
case BRW_OPCODE_LINE:
case BRW_OPCODE_LRP:
case BRW_OPCODE_MAC:
- case BRW_OPCODE_MACH:
case BRW_OPCODE_MAD:
case BRW_OPCODE_MATH:
case BRW_OPCODE_MOV:
@@ -1060,6 +1154,7 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case FS_OPCODE_FB_WRITE:
+ case SHADER_OPCODE_BARRIER:
return true;
default:
return false;
@@ -1148,13 +1243,13 @@ backend_instruction::remove(bblock_t *block)
}
void
-backend_visitor::dump_instructions()
+backend_shader::dump_instructions()
{
dump_instructions(NULL);
}
void
-backend_visitor::dump_instructions(const char *name)
+backend_shader::dump_instructions(const char *name)
{
FILE *file = stderr;
if (name && geteuid() != 0) {
@@ -1183,7 +1278,7 @@ backend_visitor::dump_instructions(const char *name)
}
void
-backend_visitor::calculate_cfg()
+backend_shader::calculate_cfg()
{
if (this->cfg)
return;
@@ -1191,7 +1286,7 @@ backend_visitor::calculate_cfg()
}
void
-backend_visitor::invalidate_cfg()
+backend_shader::invalidate_cfg()
{
ralloc_free(this->cfg);
this->cfg = NULL;
@@ -1206,7 +1301,7 @@ backend_visitor::invalidate_cfg()
* trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
*/
void
-backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
+backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
{
int num_textures = _mesa_fls(prog->SamplersUsed);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 59a0eff824e..b2c1a0b8d69 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -86,6 +86,12 @@ struct brw_compiler {
*/
int aligned_pairs_class;
} fs_reg_sets[2];
+
+ void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+ void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+
+ bool scalar_vs;
+ struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
};
enum PACKED register_file {
@@ -211,20 +217,23 @@ enum instruction_scheduler_mode {
SCHEDULE_POST,
};
-class backend_visitor : public ir_visitor {
+class backend_shader {
protected:
- backend_visitor(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- gl_shader_stage stage);
+ backend_shader(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ gl_shader_stage stage);
public:
- struct brw_context * const brw;
+ const struct brw_compiler *compiler;
+ void *log_data; /* Passed to compiler->*_log functions */
+
const struct brw_device_info * const devinfo;
- struct gl_context * const ctx;
struct brw_shader * const shader;
struct gl_shader_program * const shader_prog;
struct gl_program * const prog;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 26fdae64ea4..987672f8815 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -229,11 +229,14 @@ void brw_destroy_caches( struct brw_context *brw );
#define BRW_BATCH_STRUCT(brw, s) \
intel_batchbuffer_data(brw, (s), sizeof(*(s)), RENDER_RING)
-void *brw_state_batch(struct brw_context *brw,
- enum aub_state_struct_type type,
- int size,
- int alignment,
- uint32_t *out_offset);
+void *__brw_state_batch(struct brw_context *brw,
+ enum aub_state_struct_type type,
+ int size,
+ int alignment,
+ int index,
+ uint32_t *out_offset);
+#define brw_state_batch(brw, type, size, alignment, out_offset) \
+ __brw_state_batch(brw, type, size, alignment, 0, out_offset)
/* brw_wm_surface_state.c */
void gen4_init_vtable_surface_functions(struct brw_context *brw);
@@ -246,6 +249,7 @@ void brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
unsigned *pitch, uint32_t *tiling,
unsigned *format);
+const char *brw_surface_format_name(unsigned format);
uint32_t brw_format_for_mesa_format(mesa_format mesa_format);
GLuint translate_tex_target(GLenum target);
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index 45dca69823f..a405a80ef6e 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -38,7 +38,8 @@ static void
brw_track_state_batch(struct brw_context *brw,
enum aub_state_struct_type type,
uint32_t offset,
- int size)
+ int size,
+ int index)
{
struct intel_batchbuffer *batch = &brw->batch;
@@ -53,6 +54,7 @@ brw_track_state_batch(struct brw_context *brw,
brw->state_batch_list[brw->state_batch_count].offset = offset;
brw->state_batch_list[brw->state_batch_count].size = size;
brw->state_batch_list[brw->state_batch_count].type = type;
+ brw->state_batch_list[brw->state_batch_count].index = index;
brw->state_batch_count++;
}
@@ -108,18 +110,20 @@ brw_annotate_aub(struct brw_context *brw)
* margin (4096 bytes, even if the object is just a 20-byte surface
* state), and more buffers to walk and count for aperture size checking.
*
- * However, due to the restrictions inposed by the aperture size
+ * However, due to the restrictions imposed by the aperture size
* checking performance hacks, we can't have the batch point at a
* separate indirect state buffer, because once the batch points at
* it, no more relocations can be added to it. So, we sneak these
* buffers in at the top of the batchbuffer.
*/
void *
-brw_state_batch(struct brw_context *brw,
- enum aub_state_struct_type type,
- int size,
- int alignment,
- uint32_t *out_offset)
+__brw_state_batch(struct brw_context *brw,
+ enum aub_state_struct_type type,
+ int size,
+ int alignment,
+ int index,
+ uint32_t *out_offset)
+
{
struct intel_batchbuffer *batch = &brw->batch;
uint32_t offset;
@@ -140,7 +144,7 @@ brw_state_batch(struct brw_context *brw,
batch->state_batch_offset = offset;
if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_AUB)))
- brw_track_state_batch(brw, type, offset, size);
+ brw_track_state_batch(brw, type, offset, size, index);
*out_offset = offset;
return batch->map + (offset>>2);
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 530f5a8b76e..b6f4d598e1d 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2007 Intel Corporation
+ * Copyright © 2007-2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,6 +31,41 @@
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_eu.h"
+#include "brw_state.h"
+
+static const char *sampler_mip_filter[] = {
+ "NONE",
+ "NEAREST",
+ "RSVD",
+ "LINEAR"
+};
+
+static const char *sampler_mag_filter[] = {
+ "NEAREST",
+ "LINEAR",
+ "ANISOTROPIC",
+ "FLEXIBLE (GEN8+)",
+ "RSVD", "RSVD",
+ "MONO",
+ "RSVD"
+};
+
+static const char *sampler_addr_mode[] = {
+ "WRAP",
+ "MIRROR",
+ "CLAMP",
+ "CUBE",
+ "CLAMP_BORDER",
+ "MIRROR_ONCE",
+ "HALF_BORDER"
+};
+
+static const char *surface_tiling[] = {
+ "LINEAR",
+ "W-tiled",
+ "X-tiled",
+ "Y-tiled"
+};
static void
batch_out(struct brw_context *brw, const char *name, uint32_t offset,
@@ -50,6 +85,25 @@ batch_out(struct brw_context *brw, const char *name, uint32_t offset,
va_end(va);
}
+static void
+batch_out64(struct brw_context *brw, const char *name, uint32_t offset,
+ int index, char *fmt, ...)
+{
+ uint32_t *tmp = brw->batch.bo->virtual + offset;
+
+ /* Swap the dwords since we want to handle this as a 64b value, but the data
+ * is typically emitted as dwords.
+ */
+ uint64_t data = ((uint64_t)tmp[index + 1]) << 32 | tmp[index];
+ va_list va;
+
+ fprintf(stderr, "0x%08x: 0x%016" PRIx64 ": %8s: ",
+ offset + index * 4, data, name);
+ va_start(va, fmt);
+ vfprintf(stderr, fmt, va);
+ va_end(va);
+}
+
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
@@ -64,19 +118,6 @@ get_965_surfacetype(unsigned int surfacetype)
}
}
-static const char *
-get_965_surface_format(unsigned int surface_format)
-{
- switch (surface_format) {
- case 0x000: return "r32g32b32a32_float";
- case 0x0c1: return "b8g8r8a8_unorm";
- case 0x100: return "b5g6r5_unorm";
- case 0x102: return "b5g5r5a1_unorm";
- case 0x104: return "b4g4r4a4_unorm";
- default: return "unknown";
- }
-}
-
static void dump_vs_state(struct brw_context *brw, uint32_t offset)
{
const char *name = "VS_STATE";
@@ -176,7 +217,7 @@ static void dump_surface_state(struct brw_context *brw, uint32_t offset)
batch_out(brw, name, offset, 0, "%s %s\n",
get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
- get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)));
+ brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)));
batch_out(brw, name, offset, 1, "offset\n");
batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n",
GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1,
@@ -200,7 +241,7 @@ static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset)
batch_out(brw, name, offset, 0, "%s %s %s\n",
get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
- get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)),
+ brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)),
(surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : "");
batch_out(brw, name, offset, 1, "offset\n");
batch_out(brw, name, offset, 2, "%dx%d size, %d mips, %d slices\n",
@@ -222,6 +263,87 @@ static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset)
batch_out(brw, name, offset, 7, "\n");
}
+static float q_to_float(uint32_t data, int integer_end, int integer_start,
+ int fractional_end, int fractional_start)
+{
+ /* Convert the number to floating point. */
+ float n = GET_BITS(data, integer_start, fractional_end);
+
+ /* Multiply by 2^-n */
+ return n * exp2(-(fractional_end - fractional_start + 1));
+}
+
+static void
+dump_gen8_surface_state(struct brw_context *brw, uint32_t offset, int index)
+{
+ uint32_t *surf = brw->batch.bo->virtual + offset;
+ int aux_mode = surf[6] & INTEL_MASK(2, 0);
+ const char *aux_str;
+ char *name;
+
+ if (brw->gen >= 9 && (aux_mode == 1 || aux_mode == 5)) {
+ bool msrt = GET_BITS(surf[4], 5, 3) > 0;
+ bool compression = GET_FIELD(surf[7], GEN9_SURFACE_RT_COMPRESSION) == 1;
+ aux_str = ralloc_asprintf(NULL, "AUX_CCS_%c (%s, MULTISAMPLE_COUNT%c1)",
+ (aux_mode == 1) ? 'D' : 'E',
+ compression ? "Compressed RT" : "Uncompressed",
+ msrt ? '>' : '=');
+ } else {
+ static const char *surface_aux_mode[] = { "AUX_NONE", "AUX_MCS",
+ "AUX_APPEND", "AUX_HIZ",
+ "RSVD", "RSVD"};
+ aux_str = ralloc_asprintf(NULL, "%s", surface_aux_mode[aux_mode]);
+ }
+
+ name = ralloc_asprintf(NULL, "SURF%03d", index);
+ batch_out(brw, name, offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n",
+ get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
+ brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)),
+ (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : "",
+ 1 << (GET_BITS(surf[0], 17, 16) + 1), /* VALIGN */
+ 1 << (GET_BITS(surf[0], 15, 14) + 1), /* HALIGN */
+ surface_tiling[GET_BITS(surf[0], 13, 12)]);
+ batch_out(brw, name, offset, 1, "MOCS: 0x%x Base MIP: %.1f (%u mips) Surface QPitch: %d\n",
+ GET_FIELD(surf[1], GEN8_SURFACE_MOCS),
+ q_to_float(surf[1], 23, 20, 19, 19),
+ surf[5] & INTEL_MASK(3, 0),
+ GET_FIELD(surf[1], GEN8_SURFACE_QPITCH) << 2);
+ batch_out(brw, name, offset, 2, "%dx%d [%s]\n",
+ GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1,
+ GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1,
+ aux_str);
+ batch_out(brw, name, offset, 3, "%d slices (depth), pitch: %d\n",
+ GET_FIELD(surf[3], BRW_SURFACE_DEPTH) + 1,
+ (surf[3] & INTEL_MASK(17, 0)) + 1);
+ batch_out(brw, name, offset, 4, "min array element: %d, array extent %d, MULTISAMPLE_%d\n",
+ GET_FIELD(surf[4], GEN7_SURFACE_MIN_ARRAY_ELEMENT),
+ GET_FIELD(surf[4], GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT) + 1,
+ 1 << GET_BITS(surf[4], 5, 3));
+ batch_out(brw, name, offset, 5, "x,y offset: %d,%d, min LOD: %d\n",
+ GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET),
+ GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET),
+ GET_FIELD(surf[5], GEN7_SURFACE_MIN_LOD));
+ batch_out(brw, name, offset, 6, "AUX pitch: %d qpitch: %d\n",
+ GET_FIELD(surf[6], GEN8_SURFACE_AUX_QPITCH) << 2,
+ GET_FIELD(surf[6], GEN8_SURFACE_AUX_PITCH) << 2);
+ if (brw->gen >= 9) {
+ batch_out(brw, name, offset, 7, "Clear color: R(%x)G(%x)B(%x)A(%x)\n",
+ surf[12], surf[13], surf[14], surf[15]);
+ } else {
+ batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n",
+ GET_BITS(surf[7], 31, 31) ? 'R' : '-',
+ GET_BITS(surf[7], 30, 30) ? 'G' : '-',
+ GET_BITS(surf[7], 29, 29) ? 'B' : '-',
+ GET_BITS(surf[7], 28, 28) ? 'A' : '-');
+ }
+
+ for (int i = 8; i < 12; i++)
+ batch_out(brw, name, offset, i, "0x%08x\n", surf[i]);
+
+ ralloc_free((void *)aux_str);
+ ralloc_free(name);
+}
+
static void
dump_sdc(struct brw_context *brw, uint32_t offset)
{
@@ -229,7 +351,7 @@ dump_sdc(struct brw_context *brw, uint32_t offset)
if (brw->gen >= 5 && brw->gen <= 6) {
struct gen5_sampler_default_color *sdc = (brw->batch.bo->virtual +
- offset);
+ offset);
batch_out(brw, name, offset, 0, "unorm rgba\n");
batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]);
batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]);
@@ -271,6 +393,45 @@ static void dump_sampler_state(struct brw_context *brw,
}
}
+static void gen7_dump_sampler_state(struct brw_context *brw,
+ uint32_t offset, uint32_t size)
+{
+ const uint32_t *samp = brw->batch.bo->virtual + offset;
+ char name[20];
+
+ for (int i = 0; i < size / 16; i++) {
+ sprintf(name, "SAMPLER_STATE %d", i);
+ batch_out(brw, name, offset, i,
+ "Disabled = %s, Base Mip: %u.%u, Mip/Mag/Min Filter: %s/%s/%s, LOD Bias: %d.%d\n",
+ GET_BITS(samp[0], 31, 31) ? "yes" : "no",
+ GET_BITS(samp[0], 26, 23),
+ GET_BITS(samp[0], 22, 22),
+ sampler_mip_filter[GET_FIELD(samp[0], BRW_SAMPLER_MIP_FILTER)],
+ sampler_mag_filter[GET_FIELD(samp[0], BRW_SAMPLER_MAG_FILTER)],
+ /* min filter defs are the same as mag */
+ sampler_mag_filter[GET_FIELD(samp[0], BRW_SAMPLER_MIN_FILTER)],
+ GET_BITS(samp[0], 13, 10),
+ GET_BITS(samp[0], 9, 1)
+ );
+ batch_out(brw, name, offset, i+1, "Min LOD: %u.%u, Max LOD: %u.%u\n",
+ GET_BITS(samp[1], 31, 28),
+ GET_BITS(samp[1], 27, 20),
+ GET_BITS(samp[1], 19, 16),
+ GET_BITS(samp[1], 15, 8)
+ );
+ batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: gen8+ */
+ batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s\n",
+ (GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2,
+ sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCX_WRAP_MODE)],
+ sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCY_WRAP_MODE)],
+ sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)]
+ );
+
+ samp += 4;
+ offset += 4 * sizeof(uint32_t);
+ }
+}
+
static void dump_sf_viewport_state(struct brw_context *brw,
uint32_t offset)
{
@@ -320,10 +481,17 @@ static void dump_sf_clip_viewport_state(struct brw_context *brw,
batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30);
batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31);
batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32);
- batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin);
- batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax);
- batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin);
- batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax);
+ batch_out(brw, name, offset, 8, "guardband xmin = %f\n", vp->guardband.xmin);
+ batch_out(brw, name, offset, 9, "guardband xmax = %f\n", vp->guardband.xmax);
+ batch_out(brw, name, offset, 9, "guardband ymin = %f\n", vp->guardband.ymin);
+ batch_out(brw, name, offset, 10, "guardband ymax = %f\n", vp->guardband.ymax);
+ if (brw->gen >= 8) {
+ float *cc_vp = brw->batch.bo->virtual + offset;
+ batch_out(brw, name, offset, 12, "Min extents: %.2fx%.2f\n",
+ cc_vp[12], cc_vp[14]);
+ batch_out(brw, name, offset, 14, "Max extents: %.2fx%.2f\n",
+ cc_vp[13], cc_vp[15]);
+ }
}
@@ -398,6 +566,92 @@ static void dump_blend_state(struct brw_context *brw, uint32_t offset)
}
static void
+gen8_dump_blend_state(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+ const uint32_t *blend = brw->batch.bo->virtual + offset;
+ const char *logicop[] =
+ {
+ "LOGICOP_CLEAR (BLACK)",
+ "LOGICOP_NOR",
+ "LOGICOP_AND_INVERTED",
+ "LOGICOP_COPY_INVERTED",
+ "LOGICOP_AND_REVERSE",
+ "LOGICOP_INVERT",
+ "LOGICOP_XOR",
+ "LOGICOP_NAND",
+ "LOGICOP_AND",
+ "LOGICOP_EQUIV",
+ "LOGICOP_NOOP",
+ "LOGICOP_OR_INVERTED",
+ "LOGICOP_COPY",
+ "LOGICOP_OR_REVERSE",
+ "LOGICOP_OR",
+ "LOGICOP_SET (WHITE)"
+ };
+
+ const char *blend_function[] =
+ { "ADD", "SUBTRACT", "REVERSE_SUBTRACT", "MIN", "MAX};" };
+
+ const char *blend_factor[0x1b] =
+ {
+ "RSVD",
+ "ONE",
+ "SRC_COLOR", "SRC_ALPHA",
+ "DST_ALPHA", "DST_COLOR",
+ "SRC_ALPHA_SATURATE",
+ "CONST_COLOR", "CONST_ALPHA",
+ "SRC1_COLOR", "SRC1_ALPHA",
+ "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", "RSVD",
+ "ZERO",
+ "INV_SRC_COLOR", "INV_SRC_ALPHA",
+ "INV_DST_ALPHA", "INV_DST_COLOR",
+ "RSVD",
+ "INV_CONST_COLOR", "INV_CONST_ALPHA",
+ "INV_SRC1_COLOR", "INV_SRC1_ALPHA"
+ };
+
+ batch_out(brw, "BLEND", offset, 0, "Alpha blend/test\n");
+
+ if (((size) % 2) != 0)
+ fprintf(stderr, "Invalid blend state size %d\n", size);
+
+ for (int i = 1; i < size / 4; i += 2) {
+ char name[sizeof("BLEND_ENTRYXXX")];
+ sprintf(name, "BLEND_ENTRY%02d", (i - 1) / 2);
+ if (blend[i + 1] & GEN8_BLEND_LOGIC_OP_ENABLE) {
+ batch_out(brw, name, offset, i + 1, "%s\n",
+ logicop[GET_FIELD(blend[i + 1],
+ GEN8_BLEND_LOGIC_OP_FUNCTION)]);
+ } else if (blend[i] & GEN8_BLEND_COLOR_BUFFER_BLEND_ENABLE) {
+ batch_out64(brw, name, offset, i,
+ "\n\t\t\tColor Buffer Blend factor %s,%s,%s,%s (src,dst,src alpha, dst alpha)"
+ "\n\t\t\tfunction %s,%s (color, alpha), Disables: %c%c%c%c\n",
+ blend_factor[GET_FIELD(blend[i],
+ GEN8_BLEND_SRC_BLEND_FACTOR)],
+ blend_factor[GET_FIELD(blend[i],
+ GEN8_BLEND_DST_BLEND_FACTOR)],
+ blend_factor[GET_FIELD(blend[i],
+ GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR)],
+ blend_factor[GET_FIELD(blend[i],
+ GEN8_BLEND_DST_ALPHA_BLEND_FACTOR)],
+ blend_function[GET_FIELD(blend[i],
+ GEN8_BLEND_COLOR_BLEND_FUNCTION)],
+ blend_function[GET_FIELD(blend[i],
+ GEN8_BLEND_ALPHA_BLEND_FUNCTION)],
+ blend[i] & GEN8_BLEND_WRITE_DISABLE_RED ? 'R' : '-',
+ blend[i] & GEN8_BLEND_WRITE_DISABLE_GREEN ? 'G' : '-',
+ blend[i] & GEN8_BLEND_WRITE_DISABLE_BLUE ? 'B' : '-',
+ blend[i] & GEN8_BLEND_WRITE_DISABLE_ALPHA ? 'A' : '-'
+ );
+ } else if (!blend[i] && (blend[i + 1] == 0xb)) {
+ batch_out64(brw, name, offset, i, "NOP blend state\n");
+ } else {
+ batch_out64(brw, name, offset, i, "????\n");
+ }
+ }
+}
+
+static void
dump_scissor(struct brw_context *brw, uint32_t offset)
{
const char *name = "SCISSOR";
@@ -555,20 +809,29 @@ dump_state_batch(struct brw_context *brw)
dump_cc_state_gen4(brw, offset);
break;
case AUB_TRACE_BLEND_STATE:
- dump_blend_state(brw, offset);
+ if (brw->gen >= 8)
+ gen8_dump_blend_state(brw, offset, size);
+ else
+ dump_blend_state(brw, offset);
break;
case AUB_TRACE_BINDING_TABLE:
dump_binding_table(brw, offset, size);
break;
case AUB_TRACE_SURFACE_STATE:
- if (brw->gen < 7) {
- dump_surface_state(brw, offset);
- } else {
+ if (brw->gen >= 8) {
+ dump_gen8_surface_state(brw, offset,
+ brw->state_batch_list[i].index);
+ } else if (brw->gen >= 7) {
dump_gen7_surface_state(brw, offset);
- }
+ } else {
+ dump_surface_state(brw, offset);
+ }
break;
case AUB_TRACE_SAMPLER_STATE:
- dump_sampler_state(brw, offset, size);
+ if (brw->gen >= 7)
+ gen7_dump_sampler_state(brw, offset, size);
+ else
+ dump_sampler_state(brw, offset, size);
break;
case AUB_TRACE_SAMPLER_DEFAULT_COLOR:
dump_sdc(brw, offset);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 84b0861aaad..08d1ac28885 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -41,6 +41,7 @@
#include "brw_gs.h"
#include "brw_wm.h"
#include "brw_cs.h"
+#include "main/framebuffer.h"
static const struct brw_tracked_state *gen4_atoms[] =
{
@@ -660,6 +661,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
int i;
static int dirty_count = 0;
struct brw_state_flags state = brw->state.pipelines[pipeline];
+ unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
brw_select_pipeline(brw, pipeline);
@@ -696,8 +698,8 @@ brw_upload_pipeline_state(struct brw_context *brw,
brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
}
- if (brw->num_samples != ctx->DrawBuffer->Visual.samples) {
- brw->num_samples = ctx->DrawBuffer->Visual.samples;
+ if (brw->num_samples != fb_samples) {
+ brw->num_samples = fb_samples;
brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
}
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 016f87a4c2a..05016067bba 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -39,13 +39,14 @@ struct surface_format_info {
int input_vb;
int streamed_output_vb;
int color_processing;
+ const char *name;
};
/* This macro allows us to write the table almost as it appears in the PRM,
* while restructuring it to turn it into the C code we want.
*/
#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \
- [sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color },
+ [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf},
#define Y 0
#define x 999
@@ -73,6 +74,7 @@ struct surface_format_info {
* VB - Input Vertex Buffer
* SO - Steamed Output Vertex Buffers (transform feedback)
* color - Color Processing
+ * sf - Surface Format
*
* See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
*
@@ -85,230 +87,236 @@ struct surface_format_info {
*/
const struct surface_format_info surface_formats[] = {
/* smpl filt shad CK RT AB VB SO color */
- SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_FLOAT)
- SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_UINT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64_PASSTHRU)
- SF( Y, 50, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_FLOAT)
- SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_SINT)
- SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_UINT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_USCALED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32_SFIXED)
- SF( Y, Y, x, x, Y, 45, Y, x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT)
- SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT)
- SF( Y, 70, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT_LD)
- SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_UINT)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS)
- SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT)
- SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32A32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64_FLOAT)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32X32_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32X32_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32X32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_USCALED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64_PASSTHRU)
- SF( Y, Y, x, Y, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB)
+ SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_UINT)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R64G64_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, R32G32B32X32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED)
+ SF( x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU)
+ SF( Y, 50, x, x, x, x, Y, Y, x, R32G32B32_FLOAT)
+ SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_SINT)
+ SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_UINT)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32B32_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED)
+ SF( Y, Y, x, x, Y, 45, Y, x, 60, R16G16B16A16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16B16A16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16B16A16_FLOAT)
+ SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32_FLOAT)
+ SF( Y, 70, x, x, Y, Y, Y, Y, x, R32G32_FLOAT_LD)
+ SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_UINT)
+ SF( Y, 50, Y, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS)
+ SF( Y, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT)
+ SF( Y, 50, x, x, x, x, x, x, x, L32A32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R64_FLOAT)
+ SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, A32X32_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, L32X32_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, I32X32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R32G32_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, R32G32_SFIXED)
+ SF( x, x, x, x, x, x, x, x, x, R64_PASSTHRU)
+ SF( Y, Y, x, Y, Y, Y, Y, x, 60, B8G8R8A8_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, B8G8R8A8_UNORM_SRGB)
/* smpl filt shad CK RT AB VB SO color */
- SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM)
- SF( Y, Y, x, x, x, x, x, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10A2_UINT)
- SF( Y, Y, x, x, x, Y, Y, x, x, BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM)
- SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB)
- SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_UINT)
- SF( Y, Y, x, x, Y, 45, Y, x, x, BRW_SURFACEFORMAT_R16G16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16_FLOAT)
- SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB)
- SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R11G11B10_FLOAT)
- SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_UINT)
- SF( Y, 50, Y, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS)
- SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32_FLOAT)
- SF( Y, Y, x, Y, x, x, x, x, 60, BRW_SURFACEFORMAT_B8G8R8X8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10X2_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SNORM)
+ SF( Y, Y, x, x, Y, Y, Y, x, 60, R10G10B10A2_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, 60, R10G10B10A2_UNORM_SRGB)
+ SF( Y, x, x, x, Y, x, Y, x, x, R10G10B10A2_UINT)
+ SF( Y, Y, x, x, x, Y, Y, x, x, R10G10B10_SNORM_A2_UNORM)
+ SF( Y, Y, x, x, Y, Y, Y, x, 60, R8G8B8A8_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, R8G8B8A8_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, R8G8B8A8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_UINT)
+ SF( Y, Y, x, x, Y, 45, Y, x, x, R16G16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, R16G16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, R16G16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16_FLOAT)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, R11G11B10_FLOAT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, R32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, R32_UINT)
+ SF( Y, 50, Y, x, Y, Y, Y, Y, x, R32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS)
+ SF( Y, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT)
+ SF( Y, Y, x, x, x, x, x, x, x, L16A16_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, I24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, L24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, A24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, I32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, L32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, A32_FLOAT)
+ SF( Y, Y, x, Y, x, x, x, x, 60, B8G8R8X8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP)
+ SF( Y, Y, x, x, x, x, x, x, x, B10G10R10X2_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, L16A16_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, R32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R32_SNORM)
/* smpl filt shad CK RT AB VB SO color */
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10X2_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_USCALED)
- SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB)
- SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB)
- SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB)
- SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8G8_UNORM)
- SF( Y, Y, x, Y, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_UINT)
- SF( Y, Y, Y, x, Y, 45, Y, x, 70, BRW_SURFACEFORMAT_R16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16_FLOAT)
- SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0)
- SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1)
- SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_FLOAT)
- SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_FLOAT)
- SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_FLOAT)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM_SRGB)
- SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM)
- SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM)
- SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R10G10B10X2_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R32_USCALED)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, B5G6R5_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, B5G6R5_UNORM_SRGB)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, B5G5R5A1_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, B5G5R5A1_UNORM_SRGB)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, B4G4R4A4_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, B4G4R4A4_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, R8G8_UNORM)
+ SF( Y, Y, x, Y, Y, 60, Y, x, x, R8G8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, R8G8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, R8G8_UINT)
+ SF( Y, Y, Y, x, Y, 45, Y, x, 70, R16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, R16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, R16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, R16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, R16_FLOAT)
+ SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0)
+ SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1)
+ SF( Y, Y, Y, x, x, x, x, x, x, I16_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, L16_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, A16_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, L8A8_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, I16_FLOAT)
+ SF( Y, Y, Y, x, x, x, x, x, x, L16_FLOAT)
+ SF( Y, Y, Y, x, x, x, x, x, x, A16_FLOAT)
+ SF(45, 45, x, x, x, x, x, x, x, L8A8_UNORM_SRGB)
+ SF( Y, Y, x, Y, x, x, x, x, x, R5G5_SNORM_B6_UNORM)
+ SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM)
+ SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM_SRGB)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8_USCALED)
/* smpl filt shad CK RT AB VB SO color */
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_USCALED)
- SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0)
- SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_SINT)
- SF( Y, Y, x, 45, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_UINT)
- SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_A8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_USCALED)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM_SRGB)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_Y8_SNORM)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_SINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_SINT)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R1_UINT)
- SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL)
- SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0)
- SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1)
- SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM_SRGB)
- SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_MONO8)
- SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUV)
- SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPY)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB)
+ SF( x, x, x, x, x, x, Y, x, x, R16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R16_USCALED)
+ SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0)
+ SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1)
+ SF( x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM)
+ SF( x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM)
+ SF( x, x, x, x, x, x, x, x, x, L8A8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, L8A8_SINT)
+ SF( Y, Y, x, 45, Y, Y, Y, x, x, R8_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, R8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, R8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, R8_UINT)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, A8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, I8_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, L8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, P4A4_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, A4P4_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R8_USCALED)
+ SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE0)
+ SF(45, 45, x, x, x, x, x, x, x, L8_UNORM_SRGB)
+ SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE1)
+ SF(45, 45, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1)
+ SF(45, 45, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1)
+ SF( x, x, x, x, x, x, x, x, x, Y8_SNORM)
+ SF( x, x, x, x, x, x, x, x, x, L8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, L8_SINT)
+ SF( x, x, x, x, x, x, x, x, x, I8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, I8_SINT)
+ SF(45, 45, x, x, x, x, x, x, x, DXT1_RGB_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, R1_UINT)
+ SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_NORMAL)
+ SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_SWAPUVY)
+ SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE0)
+ SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE1)
+ SF( Y, Y, x, Y, x, x, x, x, x, BC1_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, BC2_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, BC3_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, BC4_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, BC5_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, BC1_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, BC2_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, BC3_UNORM_SRGB)
+ SF( Y, x, x, x, x, x, x, x, x, MONO8)
+ SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPUV)
+ SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPY)
+ SF( Y, Y, x, x, x, x, x, x, x, DXT1_RGB)
/* smpl filt shad CK RT AB VB SO color */
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_FXT1)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64A64_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64_FLOAT)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_SNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_SNORM)
- SF(50, 50, x, x, x, x, 60, x, x, BRW_SURFACEFORMAT_R16G16B16_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_USCALED)
- SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_SF16)
- SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM)
- SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB)
- SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_UF16)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_PLANAR_420_8)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC1_RGB8)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_R11)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_RG11)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_R11)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_UINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_SINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_UINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UINT)
- SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_SINT)
+ SF( Y, Y, x, x, x, x, x, x, x, FXT1)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8B8_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R8G8B8_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R64G64B64A64_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, R64G64B64_FLOAT)
+ SF( Y, Y, x, x, x, x, x, x, x, BC4_SNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, BC5_SNORM)
+ SF(50, 50, x, x, x, x, 60, x, x, R16G16B16_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16B16_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, R16G16B16_USCALED)
+ SF(70, 70, x, x, x, x, x, x, x, BC6H_SF16)
+ SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM)
+ SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM_SRGB)
+ SF(70, 70, x, x, x, x, x, x, x, BC6H_UF16)
+ SF( x, x, x, x, x, x, x, x, x, PLANAR_420_8)
+ SF( x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB)
+ SF( x, x, x, x, x, x, x, x, x, ETC1_RGB8)
+ SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8)
+ SF( x, x, x, x, x, x, x, x, x, EAC_R11)
+ SF( x, x, x, x, x, x, x, x, x, EAC_RG11)
+ SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11)
+ SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11)
+ SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8)
+ SF( x, x, x, x, x, x, x, x, x, R16G16B16_UINT)
+ SF( x, x, x, x, x, x, x, x, x, R16G16B16_SINT)
+ SF( x, x, x, x, x, x, x, x, x, R32_SFIXED)
+ SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM)
+ SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED)
+ SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT)
+ SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM)
+ SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED)
+ SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT)
+ SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT)
+ SF( x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU)
+ SF( x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU)
+ SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA)
+ SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA)
+ SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8)
+ SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
+ SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT)
};
#undef x
#undef Y
+const char *
+brw_surface_format_name(unsigned format)
+{
+ return surface_formats[format].name;
+}
+
uint32_t
brw_format_for_mesa_format(mesa_format mesa_format)
{
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 72b02a2cf0a..998d8c42770 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -40,9 +40,88 @@
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
static unsigned int
+tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt)
+{
+ const unsigned *align_yf, *align_ys;
+ const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
+ unsigned ret_align, divisor;
+
+ /* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below
+ * tables specifies the horizontal alignment requirement in elements
+ * for the surface. An element is defined as a pixel in uncompressed
+ * surface formats, and as a compression block in compressed surface
+ * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
+ * element is a sample.
+ */
+ const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256};
+ const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096};
+ const unsigned align_2d_yf[] = {64, 64, 32, 32, 16};
+ const unsigned align_2d_ys[] = {256, 256, 128, 128, 64};
+ const unsigned align_3d_yf[] = {16, 8, 8, 8, 4};
+ const unsigned align_3d_ys[] = {64, 32, 32, 32, 16};
+ int i = 0;
+
+ /* Alignment computations below assume bpp >= 8 and a power of 2. */
+ assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp));
+
+ switch(mt->target) {
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_1D_ARRAY:
+ align_yf = align_1d_yf;
+ align_ys = align_1d_ys;
+ break;
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_RECTANGLE:
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_CUBE_MAP:
+ case GL_TEXTURE_CUBE_MAP_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ align_yf = align_2d_yf;
+ align_ys = align_2d_ys;
+ break;
+ case GL_TEXTURE_3D:
+ align_yf = align_3d_yf;
+ align_ys = align_3d_ys;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ /* Compute array index. */
+ i = ffs(bpp/8) - 1;
+
+ ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+ align_yf[i] : align_ys[i];
+
+ assert(is_power_of_two(mt->num_samples));
+
+ switch (mt->num_samples) {
+ case 2:
+ case 4:
+ divisor = 2;
+ break;
+ case 8:
+ case 16:
+ divisor = 4;
+ break;
+ default:
+ divisor = 1;
+ break;
+ }
+ return ret_align / divisor;
+}
+
+
+static unsigned int
intel_horizontal_texture_alignment_unit(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+ struct intel_mipmap_tree *mt,
+ uint32_t layout_flags)
{
+ if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16)
+ return 16;
+
/**
* From the "Alignment Unit Size" section of various specs, namely:
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
@@ -88,18 +167,85 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
if (mt->format == MESA_FORMAT_S_UINT8)
return 8;
+ if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
+ uint32_t align = tr_mode_horizontal_texture_alignment(brw, mt);
+ /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32. */
+ return align < 32 ? 32 : align;
+ }
+
if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
return 8;
- if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1)
- return 16;
-
return 4;
}
static unsigned int
+tr_mode_vertical_texture_alignment(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt)
+{
+ const unsigned *align_yf, *align_ys;
+ const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
+ unsigned ret_align, divisor;
+
+ /* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */
+ const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
+ const unsigned align_2d_ys[] = {256, 128, 128, 64, 64};
+ const unsigned align_3d_yf[] = {16, 16, 16, 8, 8};
+ const unsigned align_3d_ys[] = {32, 32, 32, 16, 16};
+ int i = 0;
+
+ assert(brw->gen >= 9 &&
+ mt->target != GL_TEXTURE_1D &&
+ mt->target != GL_TEXTURE_1D_ARRAY);
+
+ /* Alignment computations below assume bpp >= 8 and a power of 2. */
+ assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)) ;
+
+ switch(mt->target) {
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_RECTANGLE:
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_CUBE_MAP:
+ case GL_TEXTURE_CUBE_MAP_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ align_yf = align_2d_yf;
+ align_ys = align_2d_ys;
+ break;
+ case GL_TEXTURE_3D:
+ align_yf = align_3d_yf;
+ align_ys = align_3d_ys;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ /* Compute array index. */
+ i = ffs(bpp / 8) - 1;
+
+ ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+ align_yf[i] : align_ys[i];
+
+ assert(is_power_of_two(mt->num_samples));
+
+ switch (mt->num_samples) {
+ case 4:
+ case 8:
+ divisor = 2;
+ break;
+ case 16:
+ divisor = 4;
+ break;
+ default:
+ divisor = 1;
+ break;
+ }
+ return ret_align / divisor;
+}
+
+static unsigned int
intel_vertical_texture_alignment_unit(struct brw_context *brw,
- mesa_format format, bool multisampled)
+ const struct intel_mipmap_tree *mt)
{
/**
* From the "Alignment Unit Size" section of various specs, namely:
@@ -124,23 +270,29 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw,
* Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
* the SURFACE_STATE "Surface Vertical Alignment" field.
*/
- if (_mesa_is_format_compressed(format))
+ if (_mesa_is_format_compressed(mt->format))
/* See comment above for the horizontal alignment */
return brw->gen >= 9 ? 16 : 4;
- if (format == MESA_FORMAT_S_UINT8)
+ if (mt->format == MESA_FORMAT_S_UINT8)
return brw->gen >= 7 ? 8 : 4;
+ if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
+ uint32_t align = tr_mode_vertical_texture_alignment(brw, mt);
+ /* XY_FAST_COPY_BLT doesn't support vertical alignment < 64 */
+ return align < 64 ? 64 : align;
+ }
+
/* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4
* should always be used, except for stencil buffers, which should be 8.
*/
if (brw->gen >= 8)
return 4;
- if (multisampled)
+ if (mt->num_samples > 1)
return 4;
- GLenum base_format = _mesa_get_format_base_format(format);
+ GLenum base_format = _mesa_get_format_base_format(mt->format);
if (brw->gen >= 6 &&
(base_format == GL_DEPTH_COMPONENT ||
@@ -161,7 +313,7 @@ intel_vertical_texture_alignment_unit(struct brw_context *brw,
*
* VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
*/
- if (base_format == GL_YCBCR_MESA || format == MESA_FORMAT_RGB_FLOAT32)
+ if (base_format == GL_YCBCR_MESA || mt->format == MESA_FORMAT_RGB_FLOAT32)
return 2;
return 4;
@@ -348,9 +500,9 @@ align_cube(struct intel_mipmap_tree *mt)
mt->total_height += 2;
}
-static bool
-use_linear_1d_layout(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+bool
+gen9_use_linear_1d_layout(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt)
{
/* On Gen9+ the mipmap levels of a 1D surface are all laid out in a
* horizontal line. This isn't done for depth/stencil buffers however
@@ -375,7 +527,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
struct intel_mipmap_tree *mt)
{
unsigned height = mt->physical_height0;
- bool layout_1d = use_linear_1d_layout(brw, mt);
+ bool layout_1d = gen9_use_linear_1d_layout(brw, mt);
int physical_qpitch;
if (layout_1d)
@@ -458,46 +610,111 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
align_cube(mt);
}
-void
-brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
+/**
+ * \brief Helper function for intel_miptree_create().
+ */
+static uint32_t
+brw_miptree_choose_tiling(struct brw_context *brw,
+ enum intel_miptree_tiling_mode requested,
+ const struct intel_mipmap_tree *mt)
{
- bool multisampled = mt->num_samples > 1;
- bool gen6_hiz_or_stencil = false;
+ if (mt->format == MESA_FORMAT_S_UINT8) {
+ /* The stencil buffer is W tiled. However, we request from the kernel a
+ * non-tiled buffer because the GTT is incapable of W fencing.
+ */
+ return I915_TILING_NONE;
+ }
- if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
- const GLenum base_format = _mesa_get_format_base_format(mt->format);
- gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
+ /* Some usages may want only one type of tiling, like depth miptrees (Y
+ * tiled), or temporary BOs for uploading data once (linear).
+ */
+ switch (requested) {
+ case INTEL_MIPTREE_TILING_ANY:
+ break;
+ case INTEL_MIPTREE_TILING_Y:
+ return I915_TILING_Y;
+ case INTEL_MIPTREE_TILING_NONE:
+ return I915_TILING_NONE;
}
- if (gen6_hiz_or_stencil) {
- /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
- * hardware doesn't support multiple mip levels on stencil/hiz.
+ if (mt->num_samples > 1) {
+ /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
+ * Surface"):
*
- * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
- * "The hierarchical depth buffer does not support the LOD field"
+ * [DevSNB+]: For multi-sample render targets, this field must be
+ * 1. MSRTs can only be tiled.
*
- * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
- * "The stencil depth buffer does not support the LOD field"
+ * Our usual reason for preferring X tiling (fast blits using the
+ * blitting engine) doesn't apply to MSAA, since we'll generally be
+ * downsampling or upsampling when blitting between the MSAA buffer
+ * and another buffer, and the blitting engine doesn't support that.
+ * So use Y tiling, since it makes better use of the cache.
*/
- if (mt->format == MESA_FORMAT_S_UINT8) {
- /* Stencil uses W tiling, so we force W tiling alignment for the
- * ALL_SLICES_AT_EACH_LOD miptree layout.
- */
- mt->align_w = 64;
- mt->align_h = 64;
- } else {
- /* Depth uses Y tiling, so we force need Y tiling alignment for the
- * ALL_SLICES_AT_EACH_LOD miptree layout.
- */
- mt->align_w = 128 / mt->cpp;
- mt->align_h = 32;
- }
- } else {
- mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt);
- mt->align_h =
- intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
+ return I915_TILING_Y;
+ }
+
+ GLenum base_format = _mesa_get_format_base_format(mt->format);
+ if (base_format == GL_DEPTH_COMPONENT ||
+ base_format == GL_DEPTH_STENCIL_EXT)
+ return I915_TILING_Y;
+
+ /* 1D textures (and 1D array textures) don't get any benefit from tiling,
+ * in fact it leads to a less efficient use of memory space and bandwidth
+ * due to tile alignment.
+ */
+ if (mt->logical_height0 == 1)
+ return I915_TILING_NONE;
+
+ int minimum_pitch = mt->total_width * mt->cpp;
+
+ /* If the width is much smaller than a tile, don't bother tiling. */
+ if (minimum_pitch < 64)
+ return I915_TILING_NONE;
+
+ if (ALIGN(minimum_pitch, 512) >= 32768 ||
+ mt->total_width >= 32768 || mt->total_height >= 32768) {
+ perf_debug("%dx%d miptree too large to blit, falling back to untiled",
+ mt->total_width, mt->total_height);
+ return I915_TILING_NONE;
+ }
+
+ /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
+ if (brw->gen < 6)
+ return I915_TILING_X;
+
+ /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
+ * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
+ * or Linear."
+ * 128 bits per pixel translates to 16 bytes per pixel. This is necessary
+ * all the way back to 965, but is permitted on Gen7+.
+ */
+ if (brw->gen < 7 && mt->cpp >= 16)
+ return I915_TILING_X;
+
+ /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
+ * messages), on p64, under the heading "Surface Vertical Alignment":
+ *
+ * This field must be set to VALIGN_4 for all tiled Y Render Target
+ * surfaces.
+ *
+ * So if the surface is renderable and uses a vertical alignment of 2,
+ * force it to be X tiled. This is somewhat conservative (it's possible
+ * that the client won't ever render to this surface), but it's difficult
+ * to know that ahead of time. And besides, since we use a vertical
+ * alignment of 4 as often as we can, this shouldn't happen very often.
+ */
+ if (brw->gen == 7 && mt->align_h == 2 &&
+ brw->format_supported_as_render_target[mt->format]) {
+ return I915_TILING_X;
}
+ return I915_TILING_Y | I915_TILING_X;
+}
+
+static void
+intel_miptree_set_total_width_height(struct brw_context *brw,
+ struct intel_mipmap_tree *mt)
+{
switch (mt->target) {
case GL_TEXTURE_CUBE_MAP:
if (brw->gen == 4) {
@@ -532,7 +749,7 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
break;
case INTEL_MSAA_LAYOUT_NONE:
case INTEL_MSAA_LAYOUT_IMS:
- if (use_linear_1d_layout(brw, mt))
+ if (gen9_use_linear_1d_layout(brw, mt))
gen9_miptree_layout_1d(mt);
else
brw_miptree_layout_2d(mt);
@@ -540,8 +757,62 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
}
break;
}
+
DBG("%s: %dx%dx%d\n", __func__,
mt->total_width, mt->total_height, mt->cpp);
+}
+
+void
+brw_miptree_layout(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ enum intel_miptree_tiling_mode requested,
+ uint32_t layout_flags)
+{
+ bool gen6_hiz_or_stencil = false;
+
+ mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
+
+ if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
+ const GLenum base_format = _mesa_get_format_base_format(mt->format);
+ gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
+ }
+
+ if (gen6_hiz_or_stencil) {
+ /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
+ * hardware doesn't support multiple mip levels on stencil/hiz.
+ *
+ * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
+ * "The hierarchical depth buffer does not support the LOD field"
+ *
+ * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
+ * "The stencil depth buffer does not support the LOD field"
+ */
+ if (mt->format == MESA_FORMAT_S_UINT8) {
+ /* Stencil uses W tiling, so we force W tiling alignment for the
+ * ALL_SLICES_AT_EACH_LOD miptree layout.
+ */
+ mt->align_w = 64;
+ mt->align_h = 64;
+ assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
+ } else {
+ /* Depth uses Y tiling, so we force need Y tiling alignment for the
+ * ALL_SLICES_AT_EACH_LOD miptree layout.
+ */
+ mt->align_w = 128 / mt->cpp;
+ mt->align_h = 32;
+ }
+ } else {
+ mt->align_w =
+ intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
+ mt->align_h = intel_vertical_texture_alignment_unit(brw, mt);
+ }
+
+ intel_miptree_set_total_width_height(brw, mt);
+
+ if (!mt->total_width || !mt->total_height) {
+ intel_miptree_release(&mt);
+ return;
+ }
/* On Gen9+ the alignment values are expressed in multiples of the block
* size
@@ -552,5 +823,8 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
mt->align_w /= i;
mt->align_h /= j;
}
+
+ if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0)
+ mt->tiling = brw_miptree_choose_tiling(brw, requested, mt);
}
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
index b548d234538..04e4e944118 100644
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -35,9 +35,47 @@
#include "main/mtypes.h"
#include "main/imports.h"
+#include "brw_context.h"
extern GLuint brw_translate_blend_factor( GLenum factor );
extern GLuint brw_translate_blend_equation( GLenum mode );
extern GLenum brw_fix_xRGB_alpha(GLenum function);
+static inline uint32_t
+brw_get_line_width(struct brw_context *brw)
+{
+ /* From the OpenGL 4.4 spec:
+ *
+ * "The actual width of non-antialiased lines is determined by rounding
+ * the supplied width to the nearest integer, then clamping it to the
+ * implementation-dependent maximum non-antialiased line width."
+ */
+ float line_width =
+ CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag
+ ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
+ 0.0, brw->ctx.Const.MaxLineWidth);
+ uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
+
+ /* Line width of 0 is not allowed when MSAA enabled */
+ if (brw->ctx.Multisample._Enabled) {
+ if (line_width_u3_7 == 0)
+ line_width_u3_7 = 1;
+ } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5) {
+ /* For 1 pixel line thickness or less, the general
+ * anti-aliasing algorithm gives up, and a garbage line is
+ * generated. Setting a Line Width of 0.0 specifies the
+ * rasterization of the "thinnest" (one-pixel-wide),
+ * non-antialiased lines.
+ *
+ * Lines rendered with zero Line Width are rasterized using
+ * Grid Intersection Quantization rules as specified by
+ * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
+ * Rasterization.
+ */
+ line_width_u3_7 = 0;
+ }
+
+ return line_width_u3_7;
+}
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 2841d983ad5..a5c686ceaaf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -35,6 +35,7 @@ extern "C" {
#include "program/prog_print.h"
#include "program/prog_parameter.h"
}
+#include "main/context.h"
#define MAX_INSTRUCTION (1 << 30)
@@ -1676,20 +1677,16 @@ vec4_visitor::emit_shader_time_end()
*/
emit(ADD(diff, src_reg(diff), src_reg(-2u)));
- emit_shader_time_write(st_base, src_reg(diff));
- emit_shader_time_write(st_written, src_reg(1u));
+ emit_shader_time_write(0, src_reg(diff));
+ emit_shader_time_write(1, src_reg(1u));
emit(BRW_OPCODE_ELSE);
- emit_shader_time_write(st_reset, src_reg(1u));
+ emit_shader_time_write(2, src_reg(1u));
emit(BRW_OPCODE_ENDIF);
}
void
-vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
- src_reg value)
+vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
{
- int shader_time_index =
- brw_get_shader_time_index(brw, shader_prog, prog, type);
-
dst_reg dst =
dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
@@ -1698,7 +1695,8 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
time.reg_offset++;
offset.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
+ int index = shader_time_index * 3 + shader_time_subindex;
+ emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
time.type = BRW_REGISTER_TYPE_UD;
emit(MOV(time, src_reg(value)));
@@ -1709,11 +1707,11 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
}
bool
-vec4_visitor::run()
+vec4_visitor::run(gl_clip_plane *clip_planes)
{
sanity_param_count = prog->Parameters->NumParameters;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
assign_binding_table_offsets();
@@ -1731,7 +1729,7 @@ vec4_visitor::run()
base_ir = NULL;
if (key->userclip_active && !prog->UsesClipDistanceOut)
- setup_uniform_clipplane_values();
+ setup_uniform_clipplane_values(clip_planes);
emit_thread_end();
@@ -1768,7 +1766,7 @@ vec4_visitor::run()
snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \
stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
\
- backend_visitor::dump_instructions(filename); \
+ backend_shader::dump_instructions(filename); \
} \
\
progress = progress || this_progress; \
@@ -1781,7 +1779,7 @@ vec4_visitor::run()
snprintf(filename, 64, "%s-%04d-00-start",
stage_abbrev, shader_prog ? shader_prog->Name : 0);
- backend_visitor::dump_instructions(filename);
+ backend_shader::dump_instructions(filename);
}
bool progress;
@@ -1868,8 +1866,6 @@ brw_vs_emit(struct brw_context *brw,
bool start_busy = false;
double start_time = 0;
const unsigned *assembly = NULL;
- bool use_nir =
- brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions != NULL;
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
@@ -1881,22 +1877,33 @@ brw_vs_emit(struct brw_context *brw,
if (prog)
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base,
+ ST_VS);
+
if (unlikely(INTEL_DEBUG & DEBUG_VS))
brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
- if (use_nir && !c->vp->program.Base.nir) {
- /* Normally we generate NIR in LinkShader() or ProgramStringNotify(), but
- * Mesa's fixed-function vertex program handling doesn't notify the driver
- * at all. Just do it here, at the last minute, even though it's lame.
- */
- assert(c->vp->program.Base.Id == 0 && prog == NULL);
- c->vp->program.Base.nir =
- brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
- }
+ if (brw->intelScreen->compiler->scalar_vs) {
+ if (!c->vp->program.Base.nir) {
+ /* Normally we generate NIR in LinkShader() or
+ * ProgramStringNotify(), but Mesa's fixed-function vertex program
+ * handling doesn't notify the driver at all. Just do it here, at
+ * the last minute, even though it's lame.
+ */
+ assert(c->vp->program.Base.Id == 0 && prog == NULL);
+ c->vp->program.Base.nir =
+ brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
+ }
- if (brw->scalar_vs && (prog || use_nir)) {
- fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8);
- if (!v.run_vs()) {
+ prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+
+ fs_visitor v(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_VERTEX, &c->key,
+ &prog_data->base.base, prog, &c->vp->program.Base,
+ 8, st_index);
+ if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -1908,7 +1915,8 @@ brw_vs_emit(struct brw_context *brw,
return NULL;
}
- fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base,
+ fs_generator g(brw->intelScreen->compiler, brw,
+ mem_ctx, (void *) &c->key, &prog_data->base.base,
&c->vp->program.Base, v.promoted_constants,
v.runtime_check_aads_emit, "VS");
if (INTEL_DEBUG & DEBUG_VS) {
@@ -1926,13 +1934,16 @@ brw_vs_emit(struct brw_context *brw,
g.generate_code(v.cfg, 8);
assembly = g.get_assembly(final_assembly_size);
- prog_data->base.simd8 = true;
c->base.last_scratch = v.last_scratch;
}
if (!assembly) {
- vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
- if (!v.run()) {
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+
+ vec4_vs_visitor v(brw->intelScreen->compiler,
+ c, prog_data, prog, mem_ctx, st_index,
+ !_mesa_is_gles3(&brw->ctx));
+ if (!v.run(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -1944,7 +1955,8 @@ brw_vs_emit(struct brw_context *brw,
return NULL;
}
- vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
+ vec4_generator g(brw->intelScreen->compiler, brw,
+ prog, &c->vp->program.Base, &prog_data->base,
mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
assembly = g.generate_assembly(v.cfg, final_assembly_size);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 628c6313cc9..2ac16932189 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -73,10 +73,10 @@ class vec4_live_variables;
* Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
* fixed-function) into VS IR.
*/
-class vec4_visitor : public backend_visitor
+class vec4_visitor : public backend_shader, public ir_visitor
{
public:
- vec4_visitor(struct brw_context *brw,
+ vec4_visitor(const struct brw_compiler *compiler,
struct brw_vec4_compile *c,
struct gl_program *prog,
const struct brw_vue_prog_key *key,
@@ -85,9 +85,7 @@ public:
gl_shader_stage stage,
void *mem_ctx,
bool no_spills,
- shader_time_shader_type st_base,
- shader_time_shader_type st_written,
- shader_time_shader_type st_reset);
+ int shader_time_index);
~vec4_visitor();
dst_reg dst_null_f()
@@ -160,6 +158,7 @@ public:
virtual void visit(ir_if *);
virtual void visit(ir_emit_vertex *);
virtual void visit(ir_end_primitive *);
+ virtual void visit(ir_barrier *);
/*@}*/
src_reg result;
@@ -178,10 +177,10 @@ public:
struct hash_table *variable_ht;
- bool run(void);
+ bool run(gl_clip_plane *clip_planes);
void fail(const char *msg, ...);
- void setup_uniform_clipplane_values();
+ void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
void setup_uniform_values(ir_variable *ir);
void setup_builtin_uniform_values(ir_variable *ir);
int setup_uniforms(int payload_reg);
@@ -344,8 +343,7 @@ public:
void emit_shader_time_begin();
void emit_shader_time_end();
- void emit_shader_time_write(enum shader_time_shader_type type,
- src_reg value);
+ void emit_shader_time_write(int shader_time_subindex, src_reg value);
void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
dst_reg dst, src_reg offset, src_reg src0,
@@ -412,9 +410,7 @@ private:
*/
const bool no_spills;
- const shader_time_shader_type st_base;
- const shader_time_shader_type st_written;
- const shader_time_shader_type st_reset;
+ int shader_time_index;
};
@@ -426,7 +422,7 @@ private:
class vec4_generator
{
public:
- vec4_generator(struct brw_context *brw,
+ vec4_generator(const struct brw_compiler *compiler, void *log_data,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
struct brw_vue_prog_data *prog_data,
@@ -508,7 +504,9 @@ private:
struct brw_reg dst);
void generate_unpack_flags(struct brw_reg dst);
- struct brw_context *brw;
+ const struct brw_compiler *compiler;
+ void *log_data; /* Passed to compiler->*_log functions */
+
const struct brw_device_info *devinfo;
struct brw_codegen *p;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 9147c3cbb79..c9fe0cebf27 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -114,8 +114,16 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
{
return a->opcode == b->opcode &&
a->saturate == b->saturate &&
+ a->predicate == b->predicate &&
+ a->predicate_inverse == b->predicate_inverse &&
a->conditional_mod == b->conditional_mod &&
+ a->flag_subreg == b->flag_subreg &&
a->dst.type == b->dst.type &&
+ a->offset == b->offset &&
+ a->mlen == b->mlen &&
+ a->base_mrf == b->base_mrf &&
+ a->header_size == b->header_size &&
+ a->shadow_compare == b->shadow_compare &&
a->dst.writemask == b->dst.writemask &&
a->force_writemask_all == b->force_writemask_all &&
a->regs_written == b->regs_written &&
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index ef77b8df051..d2de2f0be25 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -134,7 +134,8 @@ vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i)
return brw_reg;
}
-vec4_generator::vec4_generator(struct brw_context *brw,
+vec4_generator::vec4_generator(const struct brw_compiler *compiler,
+ void *log_data,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
struct brw_vue_prog_data *prog_data,
@@ -142,13 +143,13 @@ vec4_generator::vec4_generator(struct brw_context *brw,
bool debug_flag,
const char *stage_name,
const char *stage_abbrev)
- : brw(brw), devinfo(brw->intelScreen->devinfo),
+ : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
shader_prog(shader_prog), prog(prog), prog_data(prog_data),
mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
debug_flag(debug_flag)
{
p = rzalloc(mem_ctx, struct brw_codegen);
- brw_init_codegen(brw->intelScreen->devinfo, p, mem_ctx);
+ brw_init_codegen(devinfo, p, mem_ctx);
}
vec4_generator::~vec4_generator()
@@ -398,30 +399,25 @@ vec4_generator::generate_tex(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
} else {
/* Non-constant sampler index. */
- /* Note: this clobbers `dst` as a temporary before emitting the send */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
- struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
-
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- /* Some care required: `sampler` and `temp` may alias:
- * addr = sampler & 0xff
- * temp = (sampler << 8) & 0xf00
- * addr = addr | temp
- */
- brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
- brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
- brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
- brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
- brw_OR(p, addr, addr, temp);
+ /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
+ brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
+ if (base_binding_table_index)
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+ brw_AND(p, addr, addr, brw_imm_ud(0xfff));
brw_pop_insn_state(p);
+ if (inst->base_mrf != -1)
+ gen6_resolve_implied_move(p, &src, inst->base_mrf);
+
/* dst = send(offset, a0.0 | <descriptor>) */
brw_inst *insn = brw_send_indirect_message(
p, BRW_SFID_SAMPLER, dst, src, addr);
@@ -1631,16 +1627,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
ralloc_free(annotation.ann);
}
- static GLuint msg_id = 0;
- _mesa_gl_debug(&brw->ctx, &msg_id,
- MESA_DEBUG_SOURCE_SHADER_COMPILER,
- MESA_DEBUG_TYPE_OTHER,
- MESA_DEBUG_SEVERITY_NOTIFICATION,
- "%s vec4 shader: %d inst, %d loops, "
- "compacted %d to %d bytes.\n",
- stage_abbrev,
- before_size / 16, loop_count,
- before_size, after_size);
+ compiler->shader_debug_log(log_data,
+ "%s vec4 shader: %d inst, %d loops, "
+ "compacted %d to %d bytes.\n",
+ stage_abbrev, before_size / 16, loop_count,
+ before_size, after_size);
}
const unsigned *
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 363e30e34e4..69bcf5afc51 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -34,15 +34,15 @@ const unsigned MAX_GS_INPUT_VERTICES = 6;
namespace brw {
-vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
+vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
void *mem_ctx,
- bool no_spills)
- : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base,
+ bool no_spills,
+ int shader_time_index)
+ : vec4_visitor(compiler, &c->base, &c->gp->program.Base, &c->key.base,
&c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
- no_spills,
- ST_GS, ST_GS_WRITTEN, ST_GS_RESET),
+ no_spills, shader_time_index),
c(c)
{
}
@@ -106,7 +106,7 @@ vec4_gs_visitor::setup_payload()
* to be interleaved, so one register contains two attribute slots.
*/
int attributes_per_reg =
- c->prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2;
+ c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
/* If a geometry shader tries to read from an input that wasn't written by
* the vertex shader, that produces undefined results, but it shouldn't
@@ -629,7 +629,8 @@ generate_assembly(struct brw_context *brw,
const cfg_t *cfg,
unsigned *final_assembly_size)
{
- vec4_generator g(brw, shader_prog, prog, prog_data, mem_ctx,
+ vec4_generator g(brw->intelScreen->compiler, brw,
+ shader_prog, prog, prog_data, mem_ctx,
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
return g.generate_assembly(cfg, final_assembly_size);
}
@@ -648,6 +649,10 @@ brw_gs_emit(struct brw_context *brw,
brw_dump_ir("geometry", prog, &shader->base, NULL);
}
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS);
+
if (brw->gen >= 7) {
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
* so without spilling. If the GS invocations count > 1, then we can't use
@@ -655,10 +660,11 @@ brw_gs_emit(struct brw_context *brw,
*/
if (c->prog_data.invocations <= 1 &&
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
- c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_OBJECT;
+ c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */);
- if (v.run()) {
+ vec4_gs_visitor v(brw->intelScreen->compiler,
+ c, prog, mem_ctx, true /* no_spills */, st_index);
+ if (v.run(NULL /* clip planes */)) {
return generate_assembly(brw, prog, &c->gp->program.Base,
&c->prog_data.base, mem_ctx, v.cfg,
final_assembly_size);
@@ -690,19 +696,23 @@ brw_gs_emit(struct brw_context *brw,
* SINGLE mode.
*/
if (c->prog_data.invocations <= 1 || brw->gen < 7)
- c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_SINGLE;
+ c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
else
- c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE;
+ c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
vec4_gs_visitor *gs = NULL;
const unsigned *ret = NULL;
if (brw->gen >= 7)
- gs = new vec4_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */);
+ gs = new vec4_gs_visitor(brw->intelScreen->compiler,
+ c, prog, mem_ctx, false /* no_spills */,
+ st_index);
else
- gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */);
+ gs = new gen6_gs_visitor(brw->intelScreen->compiler,
+ c, prog, mem_ctx, false /* no_spills */,
+ st_index);
- if (!gs->run()) {
+ if (!gs->run(NULL /* clip planes */)) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, gs->fail_msg);
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index bcb5a2bcfc1..e693c56b58f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -68,11 +68,12 @@ namespace brw {
class vec4_gs_visitor : public vec4_visitor
{
public:
- vec4_gs_visitor(struct brw_context *brw,
+ vec4_gs_visitor(const struct brw_compiler *compiler,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
void *mem_ctx,
- bool no_spills);
+ bool no_spills,
+ int shader_time_index);
protected:
virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 5368a75bc0f..555c42e2f24 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -191,7 +191,6 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g,
bool
vec4_visitor::reg_allocate()
{
- struct brw_compiler *compiler = brw->intelScreen->compiler;
unsigned int hw_reg_mapping[alloc.count];
int payload_reg_count = this->first_non_payload_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e51c140c0f2..236fa51f92c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -684,9 +684,12 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
* order we'd walk the type, so walk the list of storage and find anything
* with our name, or the prefix of a component that starts with our name.
*/
- for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
+ for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+ if (storage->builtin)
+ continue;
+
if (strncmp(ir->name, storage->name, namelen) != 0 ||
(storage->name[namelen] != 0 &&
storage->name[namelen] != '.' &&
@@ -718,10 +721,8 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
}
void
-vec4_visitor::setup_uniform_clipplane_values()
+vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
{
- gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
-
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
assert(this->uniforms < uniform_array_size);
this->uniform_vector_size[this->uniforms] = 4;
@@ -2461,11 +2462,27 @@ vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler
new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
dst_reg(this, glsl_type::uvec4_type));
inst->base_mrf = 2;
- inst->mlen = 1;
inst->src[1] = sampler;
+ int param_base;
+
+ if (devinfo->gen >= 9) {
+ /* Gen9+ needs a message header in order to use SIMD4x2 mode */
+ vec4_instruction *header_inst = new(mem_ctx)
+ vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+ dst_reg(MRF, inst->base_mrf));
+
+ emit(header_inst);
+
+ inst->mlen = 2;
+ inst->header_size = 1;
+ param_base = inst->base_mrf + 1;
+ } else {
+ inst->mlen = 1;
+ param_base = inst->base_mrf;
+ }
+
/* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
- int param_base = inst->base_mrf;
int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
int zero_mask = 0xf & ~coord_mask;
@@ -2949,6 +2966,12 @@ vec4_visitor::visit(ir_end_primitive *)
}
void
+vec4_visitor::visit(ir_barrier *)
+{
+ unreachable("not reached");
+}
+
+void
vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
dst_reg dst, src_reg offset,
src_reg src0, src_reg src1)
@@ -3655,7 +3678,7 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
*reg = neg_result;
}
-vec4_visitor::vec4_visitor(struct brw_context *brw,
+vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
struct brw_vec4_compile *c,
struct gl_program *prog,
const struct brw_vue_prog_key *key,
@@ -3664,10 +3687,9 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
gl_shader_stage stage,
void *mem_ctx,
bool no_spills,
- shader_time_shader_type st_base,
- shader_time_shader_type st_written,
- shader_time_shader_type st_reset)
- : backend_visitor(brw, shader_prog, prog, &prog_data->base, stage),
+ int shader_time_index)
+ : backend_shader(compiler, NULL, mem_ctx,
+ shader_prog, prog, &prog_data->base, stage),
c(c),
key(key),
prog_data(prog_data),
@@ -3676,11 +3698,8 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
first_non_payload_grf(0),
need_all_constants_in_pull_buffer(false),
no_spills(no_spills),
- st_base(st_base),
- st_written(st_written),
- st_reset(st_reset)
+ shader_time_index(shader_time_index)
{
- this->mem_ctx = mem_ctx;
this->failed = false;
this->base_ir = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
index 92d108598a2..dcbd2405078 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
@@ -381,8 +381,7 @@ vec4_vs_visitor::emit_program_code()
break;
default:
- _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
- _mesa_opcode_string(vpi->Opcode));
+ assert(!"Unsupported opcode in vertex program");
}
/* Copy the temporary back into the actual destination register. */
@@ -574,15 +573,13 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
break;
default:
- _mesa_problem(ctx, "bad uniform src register file: %s\n",
- _mesa_register_file_name((gl_register_file)src.File));
+ assert(!"Bad uniform in src register file");
return src_reg(this, glsl_type::vec4_type);
}
break;
default:
- _mesa_problem(ctx, "bad src register file: %s\n",
- _mesa_register_file_name((gl_register_file)src.File));
+ assert(!"Bad src register file");
return src_reg(this, glsl_type::vec4_type);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 4baf73ebde1..f93062b46d0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -23,7 +23,6 @@
#include "brw_vs.h"
-#include "main/context.h"
namespace brw {
@@ -78,7 +77,7 @@ vec4_vs_visitor::emit_prolog()
/* ES 3.0 has different rules for converting signed normalized
* fixed-point numbers than desktop GL.
*/
- if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
+ if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) {
/* According to equation 2.2 of the ES 3.0 specification,
* signed normalization conversion is done by:
*
@@ -212,18 +211,21 @@ vec4_vs_visitor::emit_thread_end()
}
-vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
+vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
struct brw_vs_compile *vs_compile,
struct brw_vs_prog_data *vs_prog_data,
struct gl_shader_program *prog,
- void *mem_ctx)
- : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
+ void *mem_ctx,
+ int shader_time_index,
+ bool use_legacy_snorm_formula)
+ : vec4_visitor(compiler, &vs_compile->base, &vs_compile->vp->program.Base,
&vs_compile->key.base, &vs_prog_data->base, prog,
MESA_SHADER_VERTEX,
mem_ctx, false /* no_spills */,
- ST_VS, ST_VS_WRITTEN, ST_VS_RESET),
+ shader_time_index),
vs_compile(vs_compile),
- vs_prog_data(vs_prog_data)
+ vs_prog_data(vs_prog_data),
+ use_legacy_snorm_formula(use_legacy_snorm_formula)
{
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index d03567e33b8..6e9848fb1e9 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -40,108 +40,6 @@
#include "util/ralloc.h"
-static inline void assign_vue_slot(struct brw_vue_map *vue_map,
- int varying)
-{
- /* Make sure this varying hasn't been assigned a slot already */
- assert (vue_map->varying_to_slot[varying] == -1);
-
- vue_map->varying_to_slot[varying] = vue_map->num_slots;
- vue_map->slot_to_varying[vue_map->num_slots++] = varying;
-}
-
-/**
- * Compute the VUE map for vertex shader program.
- */
-void
-brw_compute_vue_map(const struct brw_device_info *devinfo,
- struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid)
-{
- vue_map->slots_valid = slots_valid;
- int i;
-
- /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
- * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
- */
- slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
-
- /* Make sure that the values we store in vue_map->varying_to_slot and
- * vue_map->slot_to_varying won't overflow the signed chars that are used
- * to store them. Note that since vue_map->slot_to_varying sometimes holds
- * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
- * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
- */
- STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
-
- vue_map->num_slots = 0;
- for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
- vue_map->varying_to_slot[i] = -1;
- vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
- }
-
- /* VUE header: format depends on chip generation and whether clipping is
- * enabled.
- */
- if (devinfo->gen < 6) {
- /* There are 8 dwords in VUE header pre-Ironlake:
- * dword 0-3 is indices, point width, clip flags.
- * dword 4-7 is ndc position
- * dword 8-11 is the first vertex data.
- *
- * On Ironlake the VUE header is nominally 20 dwords, but the hardware
- * will accept the same header layout as Gen4 [and should be a bit faster]
- */
- assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
- assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
- assign_vue_slot(vue_map, VARYING_SLOT_POS);
- } else {
- /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
- * dword 0-3 of the header is indices, point width, clip flags.
- * dword 4-7 is the 4D space position
- * dword 8-15 of the vertex header is the user clip distance if
- * enabled.
- * dword 8-11 or 16-19 is the first vertex element data we fill.
- */
- assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
- assign_vue_slot(vue_map, VARYING_SLOT_POS);
- if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
- assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
- if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
- assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
-
- /* front and back colors need to be consecutive so that we can use
- * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
- * two-sided color.
- */
- if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
- assign_vue_slot(vue_map, VARYING_SLOT_COL0);
- if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
- assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
- if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
- assign_vue_slot(vue_map, VARYING_SLOT_COL1);
- if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
- assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
- }
-
- /* The hardware doesn't care about the rest of the vertex outputs, so just
- * assign them contiguously. Don't reassign outputs that already have a
- * slot.
- *
- * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
- * since it's encoded as the clip distances by emit_clip_distances().
- * However, it may be output by transform feedback, and we'd rather not
- * recompute state when TF changes, so we just always include it.
- */
- for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
- if ((slots_valid & BITFIELD64_BIT(i)) &&
- vue_map->varying_to_slot[i] == -1) {
- assign_vue_slot(vue_map, i);
- }
- }
-}
-
-
/**
* Decide which set of clip planes should be used when clipping via
* gl_Position or gl_ClipVertex.
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 6157ae6ffa9..61f9b006a58 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -90,11 +90,13 @@ namespace brw {
class vec4_vs_visitor : public vec4_visitor
{
public:
- vec4_vs_visitor(struct brw_context *brw,
+ vec4_vs_visitor(const struct brw_compiler *compiler,
struct brw_vs_compile *vs_compile,
struct brw_vs_prog_data *vs_prog_data,
struct gl_shader_program *prog,
- void *mem_ctx);
+ void *mem_ctx,
+ int shader_time_index,
+ bool use_legacy_snorm_formula);
protected:
virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
@@ -115,6 +117,8 @@ private:
struct brw_vs_prog_data * const vs_prog_data;
src_reg *vp_temp_regs;
src_reg vp_addr_reg;
+
+ bool use_legacy_snorm_formula;
};
} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index f82a62b4851..b2f91bd412b 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -121,7 +121,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
/* BRW_NEW_VS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = &brw->vs.prog_data->base.base;
- dword_pitch = brw->vs.prog_data->base.simd8;
+ dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
/* _NEW_PROGRAM_CONSTANTS */
brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program.Base,
@@ -151,7 +151,7 @@ brw_upload_vs_ubo_surfaces(struct brw_context *brw)
return;
/* BRW_NEW_VS_PROG_DATA */
- dword_pitch = brw->vs.prog_data->base.simd8;
+ dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
&brw->vs.base, &brw->vs.prog_data->base.base,
dword_pitch);
diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
new file mode 100644
index 00000000000..76875789ba8
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file brw_vue_map.c
+ *
+ * This file computes the "VUE map" for a (non-fragment) shader stage, which
+ * describes the layout of its output varyings. The VUE map is used to match
+ * outputs from one stage with the inputs of the next.
+ *
+ * Largely, varyings can be placed however we like - producers/consumers simply
+ * have to agree on the layout. However, there is also a "VUE Header" that
+ * prescribes a fixed-layout for items that interact with fixed function
+ * hardware, such as the clipper and rasterizer.
+ *
+ * Authors:
+ * Paul Berry <[email protected]>
+ * Chris Forbes <[email protected]>
+ * Eric Anholt <[email protected]>
+ */
+
+
+#include "main/compiler.h"
+#include "brw_context.h"
+
+static inline void
+assign_vue_slot(struct brw_vue_map *vue_map, int varying)
+{
+ /* Make sure this varying hasn't been assigned a slot already */
+ assert (vue_map->varying_to_slot[varying] == -1);
+
+ vue_map->varying_to_slot[varying] = vue_map->num_slots;
+ vue_map->slot_to_varying[vue_map->num_slots++] = varying;
+}
+
+/**
+ * Compute the VUE map for a shader stage.
+ */
+void
+brw_compute_vue_map(const struct brw_device_info *devinfo,
+ struct brw_vue_map *vue_map,
+ GLbitfield64 slots_valid)
+{
+ vue_map->slots_valid = slots_valid;
+ int i;
+
+ /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
+ * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
+ */
+ slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+ /* Make sure that the values we store in vue_map->varying_to_slot and
+ * vue_map->slot_to_varying won't overflow the signed chars that are used
+ * to store them. Note that since vue_map->slot_to_varying sometimes holds
+ * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
+ * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
+ */
+ STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
+
+ vue_map->num_slots = 0;
+ for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
+ vue_map->varying_to_slot[i] = -1;
+ vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
+ }
+
+ /* VUE header: format depends on chip generation and whether clipping is
+ * enabled.
+ *
+ * See the Sandybridge PRM, Volume 2 Part 1, section 1.5.1 (page 30),
+ * "Vertex URB Entry (VUE) Formats" which describes the VUE header layout.
+ */
+ if (devinfo->gen < 6) {
+ /* There are 8 dwords in VUE header pre-Ironlake:
+ * dword 0-3 is indices, point width, clip flags.
+ * dword 4-7 is ndc position
+ * dword 8-11 is the first vertex data.
+ *
+ * On Ironlake the VUE header is nominally 20 dwords, but the hardware
+ * will accept the same header layout as Gen4 [and should be a bit faster]
+ */
+ assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
+ assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
+ assign_vue_slot(vue_map, VARYING_SLOT_POS);
+ } else {
+ /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+ * dword 0-3 of the header is indices, point width, clip flags.
+ * dword 4-7 is the 4D space position
+ * dword 8-15 of the vertex header is the user clip distance if
+ * enabled.
+ * dword 8-11 or 16-19 is the first vertex element data we fill.
+ */
+ assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
+ assign_vue_slot(vue_map, VARYING_SLOT_POS);
+ if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
+ assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
+ if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
+ assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
+
+ /* front and back colors need to be consecutive so that we can use
+ * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
+ * two-sided color.
+ */
+ if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
+ assign_vue_slot(vue_map, VARYING_SLOT_COL0);
+ if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+ assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
+ if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
+ assign_vue_slot(vue_map, VARYING_SLOT_COL1);
+ if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+ assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
+ }
+
+ /* The hardware doesn't care about the rest of the vertex outputs, so just
+ * assign them contiguously. Don't reassign outputs that already have a
+ * slot.
+ *
+ * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
+ * since it's encoded as the clip distances by emit_clip_distances().
+ * However, it may be output by transform feedback, and we'd rather not
+ * recompute state when TF changes, so we just always include it.
+ */
+ for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
+ if ((slots_valid & BITFIELD64_BIT(i)) &&
+ vue_map->varying_to_slot[i] == -1) {
+ assign_vue_slot(vue_map, i);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 5496225a6c7..4619ce1080d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -36,6 +36,7 @@
#include "main/formats.h"
#include "main/fbobject.h"
#include "main/samplerobj.h"
+#include "main/framebuffer.h"
#include "program/prog_parameter.h"
#include "program/program.h"
#include "intel_mipmap_tree.h"
@@ -462,7 +463,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
GLuint lookup = 0;
GLuint line_aa;
bool program_uses_dfdy = fp->program.UsesDFdy;
- bool multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
+ const bool multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
memset(key, 0, sizeof(*key));
@@ -561,7 +562,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
* drawable height in order to invert the Y axis.
*/
if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
- key->drawable_height = ctx->DrawBuffer->Height;
+ key->drawable_height = _mesa_geometric_height(ctx->DrawBuffer);
}
if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
@@ -580,7 +581,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
key->persample_shading =
_mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
if (key->persample_shading)
- key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
+ key->persample_2x = _mesa_geometric_samples(ctx->DrawBuffer) == 2;
key->compute_pos_offset =
_mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 160dd2f6c62..72aad96bb6a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -35,6 +35,7 @@
#include "main/mtypes.h"
#include "main/samplerobj.h"
#include "program/prog_parameter.h"
+#include "main/framebuffer.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
@@ -738,6 +739,9 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw,
uint32_t *surf_offset)
{
GLuint i;
+ const unsigned int w = _mesa_geometric_width(fb);
+ const unsigned int h = _mesa_geometric_height(fb);
+ const unsigned int s = _mesa_geometric_samples(fb);
/* Update surfaces for drawing buffers */
if (fb->_NumColorDrawBuffers >= 1) {
@@ -748,17 +752,15 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw,
surf_offset[surf_index] =
brw->vtbl.update_renderbuffer_surface(
brw, fb->_ColorDrawBuffers[i],
- fb->MaxNumLayers > 0, i, surf_index);
+ _mesa_geometric_layers(fb) > 0, i, surf_index);
} else {
- brw->vtbl.emit_null_surface_state(
- brw, fb->Width, fb->Height, fb->Visual.samples,
+ brw->vtbl.emit_null_surface_state(brw, w, h, s,
&surf_offset[surf_index]);
}
}
} else {
const uint32_t surf_index = render_target_start;
- brw->vtbl.emit_null_surface_state(
- brw, fb->Width, fb->Height, fb->Visual.samples,
+ brw->vtbl.emit_null_surface_state(brw, w, h, s,
&surf_offset[surf_index]);
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index aaf90df2b9c..9a29366f0e0 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -31,6 +31,7 @@
#include "brw_util.h"
#include "intel_batchbuffer.h"
#include "main/fbobject.h"
+#include "main/framebuffer.h"
static void
upload_clip_state(struct brw_context *brw)
@@ -145,11 +146,14 @@ upload_clip_state(struct brw_context *brw)
* the viewport, so we can ignore this restriction.
*/
if (brw->gen < 8) {
+ const float fb_width = (float)_mesa_geometric_width(fb);
+ const float fb_height = (float)_mesa_geometric_height(fb);
+
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
if (ctx->ViewportArray[i].X != 0 ||
ctx->ViewportArray[i].Y != 0 ||
- ctx->ViewportArray[i].Width != (float) fb->Width ||
- ctx->ViewportArray[i].Height != (float) fb->Height) {
+ ctx->ViewportArray[i].Width != fb_width ||
+ ctx->ViewportArray[i].Height != fb_height) {
dw2 &= ~GEN6_CLIP_GB_TEST;
break;
}
@@ -179,7 +183,7 @@ upload_clip_state(struct brw_context *brw)
dw2);
OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
- (fb->MaxNumLayers > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) |
+ (_mesa_geometric_layers(fb) > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) |
((ctx->Const.MaxViewports - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK));
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
index 28f23c9e4f7..27254ebb727 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -35,12 +35,13 @@ namespace brw {
class gen6_gs_visitor : public vec4_gs_visitor
{
public:
- gen6_gs_visitor(struct brw_context *brw,
+ gen6_gs_visitor(const struct brw_compiler *comp,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
void *mem_ctx,
- bool no_spills) :
- vec4_gs_visitor(brw, c, prog, mem_ctx, no_spills) {}
+ bool no_spills,
+ int shader_time_index) :
+ vec4_gs_visitor(comp, c, prog, mem_ctx, no_spills, shader_time_index) {}
protected:
virtual void assign_binding_table_offsets();
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index ec46479ff75..36734f598fe 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -26,6 +26,7 @@
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_multisample_state.h"
+#include "main/framebuffer.h"
void
gen6_get_sample_position(struct gl_context *ctx,
@@ -34,7 +35,7 @@ gen6_get_sample_position(struct gl_context *ctx,
{
uint8_t bits;
- switch (fb->Visual.samples) {
+ switch (_mesa_geometric_samples(fb)) {
case 1:
result[0] = result[1] = 0.5f;
return;
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 6431ed56d81..ba5c944fb3d 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -246,7 +246,7 @@ gen6_queryobj_get_results(struct gl_context *ctx,
* and correctly emitted the number of pixel shader invocations, but,
* whomever forgot to undo the multiply by 4.
*/
- if (brw->gen >= 8 || brw->is_haswell)
+ if (brw->gen == 8 || brw->is_haswell)
query->Base.Result /= 4;
break;
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
index 0111f152ef6..17b4a7fba96 100644
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -39,6 +39,8 @@ gen6_upload_scissor_state(struct brw_context *brw)
const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
struct gen6_scissor_rect *scissor;
uint32_t scissor_state_offset;
+ const unsigned int fb_width= _mesa_geometric_width(ctx->DrawBuffer);
+ const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer);
scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE,
sizeof(*scissor) * ctx->Const.MaxViewports, 32,
@@ -56,7 +58,11 @@ gen6_upload_scissor_state(struct brw_context *brw)
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
int bbox[4];
- _mesa_scissor_bounding_box(ctx, ctx->DrawBuffer, i, bbox);
+ bbox[0] = 0;
+ bbox[1] = fb_width;
+ bbox[2] = 0;
+ bbox[3] = fb_height;
+ _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
/* If the scissor was out of bounds and got clamped to 0 width/height
@@ -80,8 +86,8 @@ gen6_upload_scissor_state(struct brw_context *brw)
/* memory: Y=0=top */
scissor[i].xmin = bbox[0];
scissor[i].xmax = bbox[1] - 1;
- scissor[i].ymin = ctx->DrawBuffer->Height - bbox[3];
- scissor[i].ymax = ctx->DrawBuffer->Height - bbox[2] - 1;
+ scissor[i].ymin = fb_height - bbox[3];
+ scissor[i].ymax = fb_height - bbox[2] - 1;
}
}
BEGIN_BATCH(2);
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index e445ce25600..b00517ed81e 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -31,6 +31,7 @@
#include "brw_util.h"
#include "main/macros.h"
#include "main/fbobject.h"
+#include "main/framebuffer.h"
#include "intel_batchbuffer.h"
/**
@@ -273,7 +274,7 @@ upload_sf_state(struct brw_context *brw)
int i;
/* _NEW_BUFFER */
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
- bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
float point_size;
@@ -361,31 +362,7 @@ upload_sf_state(struct brw_context *brw)
/* _NEW_LINE */
{
- /* OpenGL dictates that line width should be rounded to the nearest
- * integer
- */
- float line_width =
- roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth));
- uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
-
- /* Line width of 0 is not allowed when MSAA enabled */
- if (ctx->Multisample._Enabled) {
- if (line_width_u3_7 == 0)
- line_width_u3_7 = 1;
- } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) {
- /* For 1 pixel line thickness or less, the general
- * anti-aliasing algorithm gives up, and a garbage line is
- * generated. Setting a Line Width of 0.0 specifies the
- * rasterization of the "thinnest" (one-pixel-wide),
- * non-antialiased lines.
- *
- * Lines rendered with zero Line Width are rasterized using
- * Grid Intersection Quantization rules as specified by
- * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
- * Rasterization.
- */
- line_width_u3_7 = 0;
- }
+ uint32_t line_width_u3_7 = brw_get_line_width(brw);
dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
}
if (ctx->Line.SmoothFlag) {
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
index 2fb0182c56e..7c8d8849f4e 100644
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -30,6 +30,7 @@
#include "brw_defines.h"
#include "intel_batchbuffer.h"
#include "main/fbobject.h"
+#include "main/framebuffer.h"
#include "main/viewport.h"
/* The clip VP defines the guardband region where expensive clipping is skipped
@@ -93,10 +94,10 @@ gen6_upload_sf_vp(struct brw_context *brw)
/* _NEW_BUFFERS */
if (render_to_fbo) {
y_scale = 1.0;
- y_bias = 0;
+ y_bias = 0.0;
} else {
y_scale = -1.0;
- y_bias = ctx->DrawBuffer->Height;
+ y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer);
}
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 7081eb73428..d1748ba7457 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -33,6 +33,7 @@
#include "program/program.h"
#include "program/prog_parameter.h"
#include "program/prog_statevars.h"
+#include "main/framebuffer.h"
#include "intel_batchbuffer.h"
static void
@@ -284,7 +285,7 @@ upload_wm_state(struct brw_context *brw)
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
/* _NEW_BUFFERS */
- const bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
/* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
* should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index e1c4f8b5d14..8d6d3fe1d34 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -112,7 +112,7 @@ upload_gs_state(struct brw_context *brw)
GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
((brw->gs.prog_data->invocations - 1) <<
GEN7_GS_INSTANCE_CONTROL_SHIFT) |
- brw->gs.prog_data->dispatch_mode |
+ SET_FIELD(prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) |
GEN6_GS_STATISTICS_ENABLE |
(brw->gs.prog_data->include_primitive_id ?
GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 58e33370c57..4fa46a8eb97 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -27,6 +27,7 @@
#include "brw_util.h"
#include "main/macros.h"
#include "main/fbobject.h"
+#include "main/framebuffer.h"
#include "intel_batchbuffer.h"
static void
@@ -109,7 +110,7 @@ upload_sf_state(struct brw_context *brw)
float point_size;
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
- bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
dw1 = GEN6_SF_STATISTICS_ENABLE;
@@ -192,30 +193,7 @@ upload_sf_state(struct brw_context *brw)
/* _NEW_LINE */
{
- /* OpenGL dictates that line width should be rounded to the nearest
- * integer
- */
- float line_width =
- roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth));
- uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
- /* Line width of 0 is not allowed when MSAA enabled */
- if (ctx->Multisample._Enabled) {
- if (line_width_u3_7 == 0)
- line_width_u3_7 = 1;
- } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) {
- /* For 1 pixel line thickness or less, the general
- * anti-aliasing algorithm gives up, and a garbage line is
- * generated. Setting a Line Width of 0.0 specifies the
- * rasterization of the "thinnest" (one-pixel-wide),
- * non-antialiased lines.
- *
- * Lines rendered with zero Line Width are rasterized using
- * Grid Intersection Quantization rules as specified by
- * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
- * Rasterization.
- */
- line_width_u3_7 = 0;
- }
+ uint32_t line_width_u3_7 = brw_get_line_width(brw);
dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
}
if (ctx->Line.SmoothFlag) {
diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c
index eb596845b72..b655205ec35 100644
--- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c
@@ -26,6 +26,7 @@
#include "brw_defines.h"
#include "intel_batchbuffer.h"
#include "main/fbobject.h"
+#include "main/framebuffer.h"
#include "main/viewport.h"
static void
@@ -45,10 +46,10 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw)
/* _NEW_BUFFERS */
if (render_to_fbo) {
y_scale = 1.0;
- y_bias = 0;
+ y_bias = 0.0;
} else {
y_scale = -1.0;
- y_bias = ctx->DrawBuffer->Height;
+ y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer);
}
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 278b3ec6d21..4b17d06fa83 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -43,18 +43,52 @@ gen7_upload_constant_state(struct brw_context *brw,
int dwords = brw->gen >= 8 ? 11 : 7;
BEGIN_BATCH(dwords);
OUT_BATCH(opcode << 16 | (dwords - 2));
- OUT_BATCH(active ? stage_state->push_const_size : 0);
- OUT_BATCH(0);
+
+ /* Workaround for SKL+ (we use option #2 until we have a need for more
+ * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_*
+ *
+ * The driver must ensure The following case does not occur without a flush
+ * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to
+ * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length
+ * not equal to zero committed. Possible ways to avoid this condition
+ * include:
+ * 1. always force buffer 3 to have a non zero read length
+ * 2. always force buffer 0 to a zero read length
+ */
+ if (brw->gen >= 9 && active) {
+ OUT_BATCH(0);
+ OUT_BATCH(stage_state->push_const_size);
+ } else {
+ OUT_BATCH(active ? stage_state->push_const_size : 0);
+ OUT_BATCH(0);
+ }
/* Pointer to the constant buffer. Covered by the set of state flags
* from gen6_prepare_wm_contants
*/
- OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- if (brw->gen >= 8) {
+ if (brw->gen >= 9 && active) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ /* XXX: When using buffers other than 0, you need to specify the
+ * graphics virtual address regardless of INSPM/debug bits
+ */
+ OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0,
+ stage_state->push_const_offset);
OUT_BATCH(0);
OUT_BATCH(0);
+ } else if (brw->gen>= 8) {
+ OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
+ OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index b9182758852..ea11ae845e3 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -30,6 +30,7 @@
#include "program/program.h"
#include "program/prog_parameter.h"
#include "program/prog_statevars.h"
+#include "main/framebuffer.h"
#include "intel_batchbuffer.h"
static void
@@ -45,7 +46,7 @@ upload_wm_state(struct brw_context *brw)
uint32_t dw1, dw2;
/* _NEW_BUFFERS */
- bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
dw1 = dw2 = 0;
dw1 |= GEN7_WM_STATISTICS_ENABLE;
@@ -76,6 +77,10 @@ upload_wm_state(struct brw_context *brw)
dw1 |= GEN7_WM_KILL_ENABLE;
}
+ if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) {
+ dw1 |= GEN7_WM_DISPATCH_ENABLE;
+ }
+
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw_color_buffer_write_enabled(brw) || writes_depth ||
dw1 & GEN7_WM_KILL_ENABLE) {
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index b502650f991..12ac97a5d14 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -417,6 +417,16 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
uint32_t surface_width = ALIGN(mt->logical_width0, level == 0 ? 8 : 1);
uint32_t surface_height = ALIGN(mt->logical_height0, level == 0 ? 4 : 1);
+ /* From the documentation for 3DSTATE_WM_HZ_OP: "3DSTATE_MULTISAMPLE packet
+ * must be used prior to this packet to change the Number of Multisamples.
+ * This packet must not be used to change Number of Multisamples in a
+ * rendering sequence."
+ */
+ if (brw->num_samples != mt->num_samples) {
+ gen8_emit_3dstate_multisample(brw, mt->num_samples);
+ brw->NewGLState |= _NEW_MULTISAMPLE;
+ }
+
/* The basic algorithm is:
* - If needed, emit 3DSTATE_{DEPTH,HIER_DEPTH,STENCIL}_BUFFER and
* 3DSTATE_CLEAR_PARAMS packets to set up the relevant buffers.
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 46b97131e20..26a02d3b045 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -48,8 +48,7 @@ gen8_upload_gs_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(0);
- OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE |
- brw->geometry_program->VerticesIn |
+ OUT_BATCH(brw->geometry_program->VerticesIn |
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
@@ -59,10 +58,6 @@ gen8_upload_gs_state(struct brw_context *brw)
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(brw->gs.prog_data->base.base.total_scratch) - 11);
- WARN_ONCE(true,
- "May need to implement a temporary workaround: GS Number of "
- "URB Entries must be less than or equal to the GS Maximum "
- "Number of Threads.\n");
} else {
OUT_BATCH(0);
OUT_BATCH(0);
@@ -81,7 +76,8 @@ gen8_upload_gs_state(struct brw_context *brw)
uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords <<
GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
- brw->gs.prog_data->dispatch_mode |
+ SET_FIELD(prog_data->dispatch_mode,
+ GEN7_GS_DISPATCH_MODE) |
((brw->gs.prog_data->invocations - 1) <<
GEN7_GS_INSTANCE_CONTROL_SHIFT) |
GEN6_GS_STATISTICS_ENABLE |
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 85ad3b6c551..a88f109c691 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -58,6 +58,9 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (prog_data->uses_omask)
dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
+ if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx))
+ dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
OUT_BATCH(dw1);
@@ -72,7 +75,7 @@ upload_ps_extra(struct brw_context *brw)
brw_fragment_program_const(brw->fragment_program);
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
- /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */
+ /* BRW_NEW_NUM_SAMPLES */
const bool multisampled_fbo = brw->num_samples > 1;
gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo);
@@ -80,7 +83,7 @@ upload_ps_extra(struct brw_context *brw)
const struct brw_tracked_state gen8_ps_extra = {
.dirty = {
- .mesa = _NEW_MULTISAMPLE,
+ .mesa = 0,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 52a21b6a8e8..c2b585d0001 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -154,14 +154,7 @@ upload_sf(struct brw_context *brw)
dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
/* _NEW_LINE */
- /* OpenGL dictates that line width should be rounded to the nearest
- * integer
- */
- float line_width =
- roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth));
- uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
- if (line_width_u3_7 == 0)
- line_width_u3_7 = 1;
+ uint32_t line_width_u3_7 = brw_get_line_width(brw);
if (brw->gen >= 9 || brw->is_cherryview) {
dw1 |= line_width_u3_7 << GEN9_SF_LINE_WIDTH_SHIFT;
} else {
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index d0c2d80b17b..b2d1a579815 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -57,6 +57,19 @@ swizzle_to_scs(unsigned swizzle)
}
static uint32_t
+surface_tiling_resource_mode(uint32_t tr_mode)
+{
+ switch (tr_mode) {
+ case INTEL_MIPTREE_TRMODE_YF:
+ return GEN9_SURFACE_TRMODE_TILEYF;
+ case INTEL_MIPTREE_TRMODE_YS:
+ return GEN9_SURFACE_TRMODE_TILEYS;
+ default:
+ return GEN9_SURFACE_TRMODE_NONE;
+ }
+}
+
+static uint32_t
surface_tiling_mode(uint32_t tiling)
{
switch (tiling) {
@@ -70,8 +83,18 @@ surface_tiling_mode(uint32_t tiling)
}
static unsigned
-vertical_alignment(const struct intel_mipmap_tree *mt)
+vertical_alignment(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt,
+ uint32_t surf_type)
{
+ /* On Gen9+ vertical alignment is ignored for 1D surfaces and when
+ * tr_mode is not TRMODE_NONE.
+ */
+ if (brw->gen > 8 &&
+ (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
+ surf_type == BRW_SURFACE_1D))
+ return 0;
+
switch (mt->align_h) {
case 4:
return GEN8_SURFACE_VALIGN_4;
@@ -85,8 +108,18 @@ vertical_alignment(const struct intel_mipmap_tree *mt)
}
static unsigned
-horizontal_alignment(const struct intel_mipmap_tree *mt)
+horizontal_alignment(const struct brw_context *brw,
+ const struct intel_mipmap_tree *mt,
+ uint32_t surf_type)
{
+ /* On Gen9+ horizontal alignment is ignored when tr_mode is not
+ * TRMODE_NONE.
+ */
+ if (brw->gen > 8 &&
+ (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
+ gen9_use_linear_1d_layout(brw, mt)))
+ return 0;
+
switch (mt->align_w) {
case 4:
return GEN8_SURFACE_HALIGN_4;
@@ -100,11 +133,11 @@ horizontal_alignment(const struct intel_mipmap_tree *mt)
}
static uint32_t *
-allocate_surface_state(struct brw_context *brw, uint32_t *out_offset)
+allocate_surface_state(struct brw_context *brw, uint32_t *out_offset, int index)
{
int dwords = brw->gen >= 9 ? 16 : 13;
- uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
- dwords * 4, 64, out_offset);
+ uint32_t *surf = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+ dwords * 4, 64, index, out_offset);
memset(surf, 0, dwords * 4);
return surf;
}
@@ -120,7 +153,7 @@ gen8_emit_buffer_surface_state(struct brw_context *brw,
bool rw)
{
const unsigned mocs = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
- uint32_t *surf = allocate_surface_state(brw, out_offset);
+ uint32_t *surf = allocate_surface_state(brw, out_offset, -1);
surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
surface_format << BRW_SURFACE_FORMAT_SHIFT |
@@ -164,7 +197,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
struct intel_mipmap_tree *aux_mt = NULL;
uint32_t aux_mode = 0;
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+ int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
unsigned tiling_mode, pitch;
+ const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
if (mt->format == MESA_FORMAT_S_UINT8) {
tiling_mode = GEN8_SURFACE_TILING_W;
@@ -177,18 +212,29 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
if (mt->mcs_mt) {
aux_mt = mt->mcs_mt;
aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
+
+ /*
+ * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
+ * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
+ *
+ * From the hardware spec for GEN9:
+ * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
+ * 16 must be used."
+ */
+ assert(brw->gen < 9 || mt->align_w == 16);
+ assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
}
- uint32_t *surf = allocate_surface_state(brw, surf_offset);
+ const uint32_t surf_type = translate_tex_target(target);
+ uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
- surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT |
+ surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) |
format << BRW_SURFACE_FORMAT_SHIFT |
- vertical_alignment(mt) |
- horizontal_alignment(mt) |
+ vertical_alignment(brw, mt, surf_type) |
+ horizontal_alignment(brw, mt, surf_type) |
tiling_mode;
- if (target == GL_TEXTURE_CUBE_MAP ||
- target == GL_TEXTURE_CUBE_MAP_ARRAY) {
+ if (surf_type == BRW_SURFACE_CUBE) {
surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
}
@@ -209,6 +255,12 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) |
(max_level - min_level - 1); /* mip count */
+ if (brw->gen >= 9) {
+ surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);
+ /* Disable Mip Tail by setting a large value. */
+ surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD);
+ }
+
if (aux_mt) {
surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
@@ -310,7 +362,7 @@ gen8_emit_null_surface_state(struct brw_context *brw,
unsigned samples,
uint32_t *out_offset)
{
- uint32_t *surf = allocate_surface_state(brw, out_offset);
+ uint32_t *surf = allocate_surface_state(brw, out_offset, -1);
surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
@@ -339,6 +391,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
unsigned height = mt->logical_height0;
unsigned pitch = mt->pitch;
uint32_t tiling = mt->tiling;
+ unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
uint32_t format = 0;
uint32_t surf_type;
uint32_t offset;
@@ -390,15 +443,26 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
if (mt->mcs_mt) {
aux_mt = mt->mcs_mt;
aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
+
+ /*
+ * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
+ * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
+ *
+ * From the hardware spec for GEN9:
+ * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
+ * 16 must be used."
+ */
+ assert(brw->gen < 9 || mt->align_w == 16);
+ assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
}
- uint32_t *surf = allocate_surface_state(brw, &offset);
+ uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) |
(is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
(format << BRW_SURFACE_FORMAT_SHIFT) |
- vertical_alignment(mt) |
- horizontal_alignment(mt) |
+ vertical_alignment(brw, mt, surf_type) |
+ horizontal_alignment(brw, mt, surf_type) |
surface_tiling_mode(tiling);
surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
@@ -417,6 +481,12 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
surf[5] = irb->mt_level - irb->mt->first_level;
+ if (brw->gen >= 9) {
+ surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);
+ /* Disable Mip Tail by setting a large value. */
+ surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD);
+ }
+
if (aux_mt) {
surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c
index 322e4663b99..2d8eeb1f10f 100644
--- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_viewport_state.c
@@ -26,6 +26,7 @@
#include "brw_defines.h"
#include "intel_batchbuffer.h"
#include "main/fbobject.h"
+#include "main/framebuffer.h"
#include "main/viewport.h"
static void
@@ -33,6 +34,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
float y_scale, y_bias;
+ const float fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer);
const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
float *vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
@@ -47,7 +49,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw)
y_bias = 0;
} else {
y_scale = -1.0;
- y_bias = ctx->DrawBuffer->Height;
+ y_bias = fb_height;
}
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
@@ -116,8 +118,8 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw)
} else {
vp[12] = ctx->ViewportArray[i].X;
vp[13] = viewport_Xmax - 1;
- vp[14] = ctx->DrawBuffer->Height - viewport_Ymax;
- vp[15] = ctx->DrawBuffer->Height - ctx->ViewportArray[i].Y - 1;
+ vp[14] = fb_height - viewport_Ymax;
+ vp[15] = fb_height - ctx->ViewportArray[i].Y - 1;
}
vp += 16;
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index f92af55e37f..28f5adddf14 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -39,6 +39,9 @@ upload_vs_state(struct brw_context *brw)
/* BRW_NEW_VS_PROG_DATA */
const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base;
+ assert(prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
+ prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
+
if (prog_data->base.use_alt_mode)
floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT;
@@ -66,7 +69,8 @@ upload_vs_state(struct brw_context *brw)
(prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
- uint32_t simd8_enable = prog_data->simd8 ? GEN8_VS_SIMD8_ENABLE : 0;
+ uint32_t simd8_enable = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ?
+ GEN8_VS_SIMD8_ENABLE : 0;
OUT_BATCH(((brw->max_vs_threads - 1) << HSW_VS_MAX_THREADS_SHIFT) |
GEN6_VS_STATISTICS_ENABLE |
simd8_enable |
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index e522e4e9c1d..ed659ed625e 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -743,27 +743,54 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
brw_render_cache_set_clear(brw);
}
-void
-brw_load_register_mem(struct brw_context *brw,
- uint32_t reg,
- drm_intel_bo *bo,
- uint32_t read_domains, uint32_t write_domain,
- uint32_t offset)
+static void
+load_sized_register_mem(struct brw_context *brw,
+ uint32_t reg,
+ drm_intel_bo *bo,
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t offset,
+ int size)
{
+ int i;
+
/* MI_LOAD_REGISTER_MEM only exists on Gen7+. */
assert(brw->gen >= 7);
if (brw->gen >= 8) {
- BEGIN_BATCH(4);
- OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2));
- OUT_BATCH(reg);
- OUT_RELOC64(bo, read_domains, write_domain, offset);
+ BEGIN_BATCH(4 * size);
+ for (i = 0; i < size; i++) {
+ OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2));
+ OUT_BATCH(reg + i * 4);
+ OUT_RELOC64(bo, read_domains, write_domain, offset + i * 4);
+ }
ADVANCE_BATCH();
} else {
- BEGIN_BATCH(3);
- OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
- OUT_BATCH(reg);
- OUT_RELOC(bo, read_domains, write_domain, offset);
+ BEGIN_BATCH(3 * size);
+ for (i = 0; i < size; i++) {
+ OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(reg + i * 4);
+ OUT_RELOC(bo, read_domains, write_domain, offset + i * 4);
+ }
ADVANCE_BATCH();
}
}
+
+void
+brw_load_register_mem(struct brw_context *brw,
+ uint32_t reg,
+ drm_intel_bo *bo,
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t offset)
+{
+ load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 1);
+}
+
+void
+brw_load_register_mem64(struct brw_context *brw,
+ uint32_t reg,
+ drm_intel_bo *bo,
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t offset)
+{
+ load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 2);
+}
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 7680a402975..d3ab769356c 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -77,13 +77,10 @@ br13_for_cpp(int cpp)
switch (cpp) {
case 4:
return BR13_8888;
- break;
case 2:
return BR13_565;
- break;
case 1:
return BR13_8;
- break;
default:
unreachable("not reached");
}
@@ -130,6 +127,40 @@ set_blitter_tiling(struct brw_context *brw,
ADVANCE_BATCH(); \
} while (0)
+static int
+blt_pitch(struct intel_mipmap_tree *mt)
+{
+ int pitch = mt->pitch;
+ if (mt->tiling)
+ pitch /= 4;
+ return pitch;
+}
+
+bool
+intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst)
+{
+ /* The BLT doesn't handle sRGB conversion */
+ assert(src == _mesa_get_srgb_format_linear(src));
+ assert(dst == _mesa_get_srgb_format_linear(dst));
+
+ /* No swizzle or format conversions possible, except... */
+ if (src == dst)
+ return true;
+
+ /* ...we can either discard the alpha channel when going from A->X,
+ * or we can fill the alpha channel with 0xff when going from X->A
+ */
+ if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM)
+ return (dst == MESA_FORMAT_B8G8R8A8_UNORM ||
+ dst == MESA_FORMAT_B8G8R8X8_UNORM);
+
+ if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM)
+ return (dst == MESA_FORMAT_R8G8B8A8_UNORM ||
+ dst == MESA_FORMAT_R8G8B8X8_UNORM);
+
+ return false;
+}
+
/**
* Implements a rectangular block transfer (blit) of pixels between two
* miptrees.
@@ -172,11 +203,7 @@ intel_miptree_blit(struct brw_context *brw,
* the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
* channel to 1.0 at the end.
*/
- if (src_format != dst_format &&
- ((src_format != MESA_FORMAT_B8G8R8A8_UNORM &&
- src_format != MESA_FORMAT_B8G8R8X8_UNORM) ||
- (dst_format != MESA_FORMAT_B8G8R8A8_UNORM &&
- dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) {
+ if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) {
perf_debug("%s: Can't use hardware blitter from %s to %s, "
"falling back.\n", __func__,
_mesa_get_format_name(src_format),
@@ -197,14 +224,14 @@ intel_miptree_blit(struct brw_context *brw,
*
* Furthermore, intelEmitCopyBlit (which is called below) uses a signed
* 16-bit integer to represent buffer pitch, so it can only handle buffer
- * pitches < 32k.
+ * pitches < 32k. However, the pitch is measured in bytes for linear buffers
+ * and dwords for tiled buffers.
*
* As a result of these two limitations, we can only use the blitter to do
- * this copy when the miptree's pitch is less than 32k.
+ * this copy when the miptree's pitch is less than 32k linear or 128k tiled.
*/
- if (src_mt->pitch >= 32768 ||
- dst_mt->pitch >= 32768) {
- perf_debug("Falling back due to >=32k pitch\n");
+ if (blt_pitch(src_mt) >= 32768 || blt_pitch(dst_mt) >= 32768) {
+ perf_debug("Falling back due to >= 32k/128k pitch\n");
return false;
}
@@ -261,8 +288,9 @@ intel_miptree_blit(struct brw_context *brw,
return false;
}
- if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM &&
- dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) {
+ /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */
+ if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 &&
+ _mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) {
intel_miptree_set_alpha_to_one(brw, dst_mt,
dst_x, dst_y,
width, height);
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
index f563939fdd9..2287c379c4e 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -46,6 +46,8 @@ intelEmitCopyBlit(struct brw_context *brw,
GLshort w, GLshort h,
GLenum logicop );
+bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
+
bool intel_miptree_blit(struct brw_context *brw,
struct intel_mipmap_tree *src_mt,
int src_level, int src_slice,
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index 33a0348486d..75cf7854eff 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -88,25 +88,22 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
}
void
-brw_process_intel_debug_variable(struct brw_context *brw)
+brw_process_intel_debug_variable(struct intel_screen *screen)
{
uint64_t intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
(void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug);
if (INTEL_DEBUG & DEBUG_BUFMGR)
- dri_bufmgr_set_debug(brw->bufmgr, true);
+ dri_bufmgr_set_debug(screen->bufmgr, true);
- if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) {
+ if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && screen->devinfo->gen < 7) {
fprintf(stderr,
"shader_time debugging requires gen7 (Ivybridge) or better.\n");
INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
}
- if (INTEL_DEBUG & DEBUG_PERF)
- brw->perf_debug = true;
-
if (INTEL_DEBUG & DEBUG_AUB)
- drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true);
+ drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true);
}
/**
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index f754be20b1d..4689492e1fd 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -114,8 +114,8 @@ extern uint64_t INTEL_DEBUG;
extern uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage);
-struct brw_context;
+struct intel_screen;
-extern void brw_process_intel_debug_variable(struct brw_context *brw);
+extern void brw_process_intel_debug_variable(struct intel_screen *);
extern bool brw_env_var_as_boolean(const char *var_name, bool default_value);
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index d6da34c7065..c99677c7197 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -323,9 +323,12 @@ intelInitExtensions(struct gl_context *ctx)
}
}
+ brw->predicate.supported = false;
+
if (brw->gen >= 7) {
ctx->Extensions.ARB_conservative_depth = true;
ctx->Extensions.ARB_derivative_control = true;
+ ctx->Extensions.ARB_framebuffer_no_attachments = true;
ctx->Extensions.ARB_gpu_shader5 = true;
ctx->Extensions.ARB_shader_atomic_counters = true;
ctx->Extensions.ARB_texture_compression_bptc = true;
@@ -337,6 +340,9 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_transform_feedback2 = true;
ctx->Extensions.ARB_transform_feedback3 = true;
ctx->Extensions.ARB_transform_feedback_instanced = true;
+
+ if (brw->intelScreen->cmd_parser_version >= 2)
+ brw->predicate.supported = true;
}
/* Only enable this in core profile because other parts of Mesa behave
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index aebed723f75..1b3a72f3ec2 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
image->height,
1,
image->pitch,
- true /*disable_aux_buffers*/);
+ MIPTREE_LAYOUT_DISABLE_AUX);
if (!irb->mt)
return;
@@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
intel_image->base.Base.Level,
intel_image->base.Base.Level,
width, height, depth,
- true,
irb->mt->num_samples,
INTEL_MIPTREE_TILING_ANY,
- false);
+ MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
intel_miptree_alloc_hiz(brw, new_mt);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 24a5c3dc666..6aa969a4930 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -158,15 +158,32 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
}
}
+bool
+intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
+{
+ /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+ * Target(s)", beneath the "Fast Color Clear" bullet (p326):
+ *
+ * - Support is limited to tiled render targets.
+ *
+ * Gen9 changes the restriction to Y-tile only.
+ */
+ if (brw->gen >= 9)
+ return tiling == I915_TILING_Y;
+ else if (brw->gen >= 7)
+ return tiling != I915_TILING_NONE;
+ else
+ return false;
+}
/**
* For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
- * can be used.
+ * can be used. This doesn't (and should not) inspect any of the properties of
+ * the miptree's BO.
*
* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
* beneath the "Fast Color Clear" bullet (p326):
*
- * - Support is limited to tiled render targets.
* - Support is for non-mip-mapped and non-array surface types only.
*
* And then later, on p327:
@@ -175,8 +192,8 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
* 64bpp, and 128bpp.
*/
bool
-intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+intel_miptree_is_fast_clear_capable(struct brw_context *brw,
+ struct intel_mipmap_tree *mt)
{
/* MCS support does not exist prior to Gen7 */
if (brw->gen < 7)
@@ -193,15 +210,25 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
return false;
}
- if (mt->tiling != I915_TILING_X &&
- mt->tiling != I915_TILING_Y)
- return false;
if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
return false;
- if (mt->first_level != 0 || mt->last_level != 0)
+ if (mt->first_level != 0 || mt->last_level != 0) {
+ if (brw->gen >= 8) {
+ perf_debug("Multi-LOD fast clear - giving up (%dx%dx%d).\n",
+ mt->logical_width0, mt->logical_height0, mt->last_level);
+ }
+
return false;
- if (mt->physical_depth0 != 1)
+ }
+ if (mt->physical_depth0 != 1) {
+ if (brw->gen >= 8) {
+ perf_debug("Layered fast clear - giving up. (%dx%d%d)\n",
+ mt->logical_width0, mt->logical_height0,
+ mt->physical_depth0);
+ }
+
return false;
+ }
/* There's no point in using an MCS buffer if the surface isn't in a
* renderable format.
@@ -244,10 +271,9 @@ intel_miptree_create_layout(struct brw_context *brw,
GLuint width0,
GLuint height0,
GLuint depth0,
- bool for_bo,
GLuint num_samples,
- bool force_all_slices_at_each_lod,
- bool disable_aux_buffers)
+ enum intel_miptree_tiling_mode requested,
+ uint32_t layout_flags)
{
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
if (!mt)
@@ -286,7 +312,7 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->logical_height0 = height0;
mt->logical_depth0 = depth0;
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
- mt->disable_aux_buffers = disable_aux_buffers;
+ mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0;
exec_list_make_empty(&mt->hiz_map);
/* The cpp is bytes per (1, blockheight)-sized block for compressed
@@ -422,12 +448,15 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->physical_height0 = height0;
mt->physical_depth0 = depth0;
- if (!for_bo &&
+ if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
_mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
(brw->must_use_separate_stencil ||
(brw->has_separate_stencil &&
intel_miptree_wants_hiz_buffer(brw, mt)))) {
- const bool force_all_slices_at_each_lod = brw->gen == 6;
+ uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
+ if (brw->gen == 6)
+ stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
+
mt->stencil_mt = intel_miptree_create(brw,
mt->target,
MESA_FORMAT_S_UINT8,
@@ -436,10 +465,10 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->logical_width0,
mt->logical_height0,
mt->logical_depth0,
- true,
num_samples,
INTEL_MIPTREE_TILING_ANY,
- force_all_slices_at_each_lod);
+ stencil_flags);
+
if (!mt->stencil_mt) {
intel_miptree_release(&mt);
return NULL;
@@ -457,119 +486,36 @@ intel_miptree_create_layout(struct brw_context *brw,
}
}
- if (force_all_slices_at_each_lod)
+ if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
mt->array_layout = ALL_SLICES_AT_EACH_LOD;
- brw_miptree_layout(brw, mt);
-
- if (mt->disable_aux_buffers)
- assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
-
- return mt;
-}
-
-/**
- * \brief Helper function for intel_miptree_create().
- */
-static uint32_t
-intel_miptree_choose_tiling(struct brw_context *brw,
- mesa_format format,
- uint32_t width0,
- uint32_t num_samples,
- enum intel_miptree_tiling_mode requested,
- struct intel_mipmap_tree *mt)
-{
- if (format == MESA_FORMAT_S_UINT8) {
- /* The stencil buffer is W tiled. However, we request from the kernel a
- * non-tiled buffer because the GTT is incapable of W fencing.
- */
- return I915_TILING_NONE;
- }
-
- /* Some usages may want only one type of tiling, like depth miptrees (Y
- * tiled), or temporary BOs for uploading data once (linear).
- */
- switch (requested) {
- case INTEL_MIPTREE_TILING_ANY:
- break;
- case INTEL_MIPTREE_TILING_Y:
- return I915_TILING_Y;
- case INTEL_MIPTREE_TILING_NONE:
- return I915_TILING_NONE;
- }
-
- if (num_samples > 1) {
- /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
- * Surface"):
- *
- * [DevSNB+]: For multi-sample render targets, this field must be
- * 1. MSRTs can only be tiled.
- *
- * Our usual reason for preferring X tiling (fast blits using the
- * blitting engine) doesn't apply to MSAA, since we'll generally be
- * downsampling or upsampling when blitting between the MSAA buffer
- * and another buffer, and the blitting engine doesn't support that.
- * So use Y tiling, since it makes better use of the cache.
- */
- return I915_TILING_Y;
- }
-
- GLenum base_format = _mesa_get_format_base_format(format);
- if (base_format == GL_DEPTH_COMPONENT ||
- base_format == GL_DEPTH_STENCIL_EXT)
- return I915_TILING_Y;
-
- /* 1D textures (and 1D array textures) don't get any benefit from tiling,
- * in fact it leads to a less efficient use of memory space and bandwidth
- * due to tile alignment.
+ /*
+ * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are
+ * multisampled or have an AUX buffer attached to it.
+ *
+ * GEN | MSRT | AUX_CCS_* or AUX_MCS
+ * -------------------------------------------
+ * 9 | HALIGN_16 | HALIGN_16
+ * 8 | HALIGN_ANY | HALIGN_16
+ * 7 | ? | ?
+ * 6 | ? | ?
*/
- if (mt->logical_height0 == 1)
- return I915_TILING_NONE;
-
- int minimum_pitch = mt->total_width * mt->cpp;
-
- /* If the width is much smaller than a tile, don't bother tiling. */
- if (minimum_pitch < 64)
- return I915_TILING_NONE;
-
- if (ALIGN(minimum_pitch, 512) >= 32768 ||
- mt->total_width >= 32768 || mt->total_height >= 32768) {
- perf_debug("%dx%d miptree too large to blit, falling back to untiled",
- mt->total_width, mt->total_height);
- return I915_TILING_NONE;
+ if (intel_miptree_is_fast_clear_capable(brw, mt)) {
+ if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1))
+ layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+ } else if (brw->gen >= 9 && num_samples > 1) {
+ layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+ } else {
+ /* For now, nothing else has this requirement */
+ assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
}
- /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
- if (brw->gen < 6)
- return I915_TILING_X;
+ brw_miptree_layout(brw, mt, requested, layout_flags);
- /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
- * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
- * or Linear."
- * 128 bits per pixel translates to 16 bytes per pixel. This is necessary
- * all the way back to 965, but is permitted on Gen7+.
- */
- if (brw->gen < 7 && mt->cpp >= 16)
- return I915_TILING_X;
-
- /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
- * messages), on p64, under the heading "Surface Vertical Alignment":
- *
- * This field must be set to VALIGN_4 for all tiled Y Render Target
- * surfaces.
- *
- * So if the surface is renderable and uses a vertical alignment of 2,
- * force it to be X tiled. This is somewhat conservative (it's possible
- * that the client won't ever render to this surface), but it's difficult
- * to know that ahead of time. And besides, since we use a vertical
- * alignment of 4 as often as we can, this shouldn't happen very often.
- */
- if (brw->gen == 7 && mt->align_h == 2 &&
- brw->format_supported_as_render_target[format]) {
- return I915_TILING_X;
- }
+ if (mt->disable_aux_buffers)
+ assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
- return I915_TILING_Y | I915_TILING_X;
+ return mt;
}
@@ -615,33 +561,33 @@ intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
struct intel_mipmap_tree *
intel_miptree_create(struct brw_context *brw,
- GLenum target,
- mesa_format format,
- GLuint first_level,
- GLuint last_level,
- GLuint width0,
- GLuint height0,
- GLuint depth0,
- bool expect_accelerated_upload,
+ GLenum target,
+ mesa_format format,
+ GLuint first_level,
+ GLuint last_level,
+ GLuint width0,
+ GLuint height0,
+ GLuint depth0,
GLuint num_samples,
enum intel_miptree_tiling_mode requested_tiling,
- bool force_all_slices_at_each_lod)
+ uint32_t layout_flags)
{
struct intel_mipmap_tree *mt;
mesa_format tex_format = format;
mesa_format etc_format = MESA_FORMAT_NONE;
GLuint total_width, total_height;
+ uint32_t alloc_flags = 0;
format = intel_lower_compressed_format(brw, format);
etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
+ assert((layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) == 0);
+ assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
mt = intel_miptree_create_layout(brw, target, format,
- first_level, last_level, width0,
- height0, depth0,
- false, num_samples,
- force_all_slices_at_each_lod,
- false /*disable_aux_buffers*/);
+ first_level, last_level, width0,
+ height0, depth0, num_samples,
+ requested_tiling, layout_flags);
/*
* pitch == 0 || height == 0 indicates the null texture
*/
@@ -659,25 +605,21 @@ intel_miptree_create(struct brw_context *brw,
total_height = ALIGN(total_height, 64);
}
- uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
- num_samples, requested_tiling,
- mt);
bool y_or_x = false;
- if (tiling == (I915_TILING_Y | I915_TILING_X)) {
+ if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) {
y_or_x = true;
mt->tiling = I915_TILING_Y;
- } else {
- mt->tiling = tiling;
}
+ if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
+ alloc_flags |= BO_ALLOC_FOR_RENDER;
+
unsigned long pitch;
+ mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", total_width,
+ total_height, mt->cpp, &mt->tiling,
+ &pitch, alloc_flags);
mt->etc_format = etc_format;
- mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
- total_width, total_height, mt->cpp,
- &mt->tiling, &pitch,
- (expect_accelerated_upload ?
- BO_ALLOC_FOR_RENDER : 0));
mt->pitch = pitch;
/* If the BO is too large to fit in the aperture, we need to use the
@@ -691,10 +633,8 @@ intel_miptree_create(struct brw_context *brw,
mt->tiling = I915_TILING_X;
drm_intel_bo_unreference(mt->bo);
mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
- total_width, total_height, mt->cpp,
- &mt->tiling, &pitch,
- (expect_accelerated_upload ?
- BO_ALLOC_FOR_RENDER : 0));
+ total_width, total_height, mt->cpp,
+ &mt->tiling, &pitch, alloc_flags);
mt->pitch = pitch;
}
@@ -707,6 +647,7 @@ intel_miptree_create(struct brw_context *brw,
if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
+ assert(mt->num_samples > 1);
if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
intel_miptree_release(&mt);
return NULL;
@@ -718,8 +659,11 @@ intel_miptree_create(struct brw_context *brw,
* Allocation of the MCS miptree will be deferred until the first fast
* clear actually occurs.
*/
- if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
+ if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
+ intel_miptree_is_fast_clear_capable(brw, mt)) {
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
+ assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1);
+ }
return mt;
}
@@ -733,7 +677,7 @@ intel_miptree_create_for_bo(struct brw_context *brw,
uint32_t height,
uint32_t depth,
int pitch,
- bool disable_aux_buffers)
+ uint32_t layout_flags)
{
struct intel_mipmap_tree *mt;
uint32_t tiling, swizzle;
@@ -754,11 +698,18 @@ intel_miptree_create_for_bo(struct brw_context *brw,
target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
+ /* 'requested' parameter of intel_miptree_create_layout() is relevant
+ * only for non bo miptree. Tiling for bo is already computed above.
+ * So, the tiling requested (INTEL_MIPTREE_TILING_ANY) below is
+ * just a place holder and will not make any change to the miptree
+ * tiling format.
+ */
+ layout_flags |= MIPTREE_LAYOUT_FOR_BO;
mt = intel_miptree_create_layout(brw, target, format,
0, 0,
- width, height, depth,
- true, 0, false,
- disable_aux_buffers);
+ width, height, depth, 0,
+ INTEL_MIPTREE_TILING_ANY,
+ layout_flags);
if (!mt)
return NULL;
@@ -808,7 +759,7 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
height,
1,
pitch,
- false);
+ 0);
if (!singlesample_mt)
goto fail;
@@ -817,7 +768,8 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
* Allocation of the MCS miptree will be deferred until the first fast
* clear actually occurs.
*/
- if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
+ if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) &&
+ intel_miptree_is_fast_clear_capable(intel, singlesample_mt))
singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
if (num_samples == 0) {
@@ -866,8 +818,9 @@ intel_miptree_create_for_renderbuffer(struct brw_context *brw,
GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
mt = intel_miptree_create(brw, target, format, 0, 0,
- width, height, depth, true, num_samples,
- INTEL_MIPTREE_TILING_ANY, false);
+ width, height, depth, num_samples,
+ INTEL_MIPTREE_TILING_ANY,
+ MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
if (!mt)
goto fail;
@@ -1258,8 +1211,10 @@ intel_miptree_copy_slice(struct brw_context *brw,
assert(src_mt->format == dst_mt->format);
if (dst_mt->compressed) {
- height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
- width = ALIGN(width, dst_mt->align_w);
+ unsigned int i, j;
+ _mesa_get_format_block_size(dst_mt->format, &i, &j);
+ height = ALIGN(height, j) / j;
+ width = ALIGN(width, i);
}
/* If it's a packed depth/stencil buffer with separate stencil, the blit
@@ -1378,10 +1333,9 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
mt->logical_width0,
mt->logical_height0,
mt->logical_depth0,
- true,
0 /* num_samples */,
INTEL_MIPTREE_TILING_Y,
- false);
+ MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
/* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
*
@@ -1429,6 +1383,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
unsigned mcs_height =
ALIGN(mt->logical_height0, height_divisor) / height_divisor;
assert(mt->logical_depth0 == 1);
+ uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
+ if (brw->gen >= 8)
+ layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
mt->mcs_mt = intel_miptree_create(brw,
mt->target,
format,
@@ -1437,10 +1394,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
mcs_width,
mcs_height,
mt->logical_depth0,
- true,
0 /* num_samples */,
INTEL_MIPTREE_TILING_Y,
- false);
+ layout_flags);
return mt->mcs_mt;
}
@@ -1682,7 +1638,10 @@ intel_hiz_miptree_buf_create(struct brw_context *brw,
struct intel_mipmap_tree *mt)
{
struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
- const bool force_all_slices_at_each_lod = brw->gen == 6;
+ uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
+
+ if (brw->gen == 6)
+ layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
if (!buf)
return NULL;
@@ -1695,10 +1654,9 @@ intel_hiz_miptree_buf_create(struct brw_context *brw,
mt->logical_width0,
mt->logical_height0,
mt->logical_depth0,
- true,
mt->num_samples,
INTEL_MIPTREE_TILING_ANY,
- force_all_slices_at_each_lod);
+ layout_flags);
if (!buf->mt) {
free(buf);
return NULL;
@@ -2128,9 +2086,8 @@ intel_miptree_map_blit(struct brw_context *brw,
map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
0, 0,
map->w, map->h, 1,
- false, 0,
- INTEL_MIPTREE_TILING_NONE,
- false);
+ 0, INTEL_MIPTREE_TILING_NONE, 0);
+
if (!map->mt) {
fprintf(stderr, "Failed to allocate blit temporary\n");
goto fail;
@@ -2675,7 +2632,9 @@ intel_miptree_map(struct brw_context *brw,
} else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
intel_miptree_map_blit(brw, mt, map, level, slice);
#if defined(USE_SSE41)
- } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) {
+ } else if (!(mode & GL_MAP_WRITE_BIT) &&
+ !mt->compressed && cpu_has_sse4_1 &&
+ (mt->pitch % 16 == 0)) {
intel_miptree_map_movntdqa(brw, mt, map, level, slice);
#endif
} else {
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 8b42e4adb79..bde6daa4e2d 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -330,6 +330,13 @@ struct intel_miptree_aux_buffer
struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */
};
+/* Tile resource modes */
+enum intel_miptree_tr_mode {
+ INTEL_MIPTREE_TRMODE_NONE,
+ INTEL_MIPTREE_TRMODE_YF,
+ INTEL_MIPTREE_TRMODE_YS
+};
+
struct intel_mipmap_tree
{
/** Buffer object containing the pixel data. */
@@ -338,6 +345,7 @@ struct intel_mipmap_tree
uint32_t pitch; /**< pitch in bytes. */
uint32_t tiling; /**< One of the I915_TILING_* flags */
+ enum intel_miptree_tr_mode tr_mode;
/* Effectively the key:
*/
@@ -514,19 +522,27 @@ enum intel_miptree_tiling_mode {
INTEL_MIPTREE_TILING_NONE,
};
-bool
-intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
- struct intel_mipmap_tree *mt);
-
void
intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height);
-
+bool
+intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling);
+bool
+intel_miptree_is_fast_clear_capable(struct brw_context *brw,
+ struct intel_mipmap_tree *mt);
bool
intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt);
+enum {
+ MIPTREE_LAYOUT_ACCELERATED_UPLOAD = 1 << 0,
+ MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD = 1 << 1,
+ MIPTREE_LAYOUT_FOR_BO = 1 << 2,
+ MIPTREE_LAYOUT_DISABLE_AUX = 1 << 3,
+ MIPTREE_LAYOUT_FORCE_HALIGN16 = 1 << 4,
+};
+
struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
GLenum target,
mesa_format format,
@@ -535,10 +551,9 @@ struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
GLuint width0,
GLuint height0,
GLuint depth0,
- bool expect_accelerated_upload,
GLuint num_samples,
enum intel_miptree_tiling_mode,
- bool force_all_slices_at_each_lod);
+ uint32_t flags);
struct intel_mipmap_tree *
intel_miptree_create_for_bo(struct brw_context *brw,
@@ -549,7 +564,7 @@ intel_miptree_create_for_bo(struct brw_context *brw,
uint32_t height,
uint32_t depth,
int pitch,
- bool disable_aux_buffers);
+ uint32_t layout_flags);
void
intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
@@ -753,7 +768,11 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
const struct intel_mipmap_tree *mt,
unsigned level);
-void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt);
+void
+brw_miptree_layout(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ enum intel_miptree_tiling_mode requested,
+ uint32_t layout_flags);
void *intel_miptree_map_raw(struct brw_context *brw,
struct intel_mipmap_tree *mt);
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
index 4ecefc8cf54..6c6bd8629ac 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
@@ -28,6 +28,7 @@
#include "main/glheader.h"
#include "main/enums.h"
#include "main/image.h"
+#include "main/glformats.h"
#include "main/mtypes.h"
#include "main/condrender.h"
#include "main/fbobject.h"
@@ -76,8 +77,16 @@ do_blit_drawpixels(struct gl_context * ctx,
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
- if (!_mesa_format_matches_format_and_type(irb->mt->format, format, type,
- false)) {
+ mesa_format src_format = _mesa_format_from_format_and_type(format, type);
+ if (_mesa_format_is_mesa_array_format(src_format))
+ src_format = _mesa_format_from_array_format(src_format);
+ mesa_format dst_format = irb->mt->format;
+
+ /* We can safely discard sRGB encode/decode for the DrawPixels interface */
+ src_format = _mesa_get_srgb_format_linear(src_format);
+ dst_format = _mesa_get_srgb_format_linear(dst_format);
+
+ if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) {
DBG("%s: bad format for blit\n", __func__);
return false;
}
@@ -112,7 +121,7 @@ do_blit_drawpixels(struct gl_context * ctx,
src_offset,
width, height, 1,
src_stride,
- false /*disable_aux_buffers*/);
+ 0);
if (!pbo_mt)
return false;
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index d3ca38b6ecd..30380570d62 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -226,8 +226,30 @@ intelReadPixels(struct gl_context * ctx,
if (_mesa_is_bufferobj(pack->BufferObj)) {
if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1,
- format, type, pixels, pack))
+ format, type, pixels, pack)) {
+ /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by
+ * binding the user-provided BO as a fake framebuffer and rendering
+ * to it. This breaks the invariant of the GL that nothing is able
+ * to render to a BO, causing nondeterministic corruption issues
+ * because the render cache is not coherent with a number of other
+ * caches that the BO could potentially be bound to afterwards.
+ *
+ * This could be solved in the same way that we guarantee texture
+ * coherency after a texture is attached to a framebuffer and
+ * rendered to, but that would involve checking *all* BOs bound to
+ * the pipeline for the case we need to emit a cache flush due to
+ * previous rendering to any of them -- Including vertex, index,
+ * uniform, atomic counter, shader image, transform feedback,
+ * indirect draw buffers, etc.
+ *
+ * That would increase the per-draw call overhead even though it's
+ * very unlikely that any of the BOs bound to the pipeline has been
+ * rendered to via a PBO at any point, so it seems better to just
+ * flush here unconditionally.
+ */
+ intel_batchbuffer_emit_mi_flush(brw);
return;
+ }
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
}
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 488fb5b98f8..bd14e189da3 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -48,6 +48,20 @@
#define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23))
# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22)
+/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
+#define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23))
+# define MI_PREDICATE_LOADOP_KEEP (0 << 6)
+# define MI_PREDICATE_LOADOP_LOAD (2 << 6)
+# define MI_PREDICATE_LOADOP_LOADINV (3 << 6)
+# define MI_PREDICATE_COMBINEOP_SET (0 << 3)
+# define MI_PREDICATE_COMBINEOP_AND (1 << 3)
+# define MI_PREDICATE_COMBINEOP_OR (2 << 3)
+# define MI_PREDICATE_COMBINEOP_XOR (3 << 3)
+# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0)
+# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0)
+# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0)
+# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0)
+
/** @{
*
* PIPE_CONTROL operation, a combination MI_FLUSH and register write with
@@ -69,6 +83,7 @@
#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */
#define PIPE_CONTROL_ISP_DIS (1 << 9)
#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
+#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
/* GT */
#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5)
#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
@@ -147,3 +162,11 @@
# define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1)
# define GEN8_HIZ_PMA_MASK_BITS \
((GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) << 16)
+
+/* Predicate registers */
+#define MI_PREDICATE_SRC0 0x2400
+#define MI_PREDICATE_SRC1 0x2408
+#define MI_PREDICATE_DATA 0x2410
+#define MI_PREDICATE_RESULT 0x2418
+#define MI_PREDICATE_RESULT_1 0x241C
+#define MI_PREDICATE_RESULT_2 0x2214
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 4860a160ee9..de14696bd76 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -39,6 +39,7 @@
#include "swrast/s_renderbuffer.h"
#include "util/ralloc.h"
#include "brw_shader.h"
+#include "glsl/nir/nir.h"
#include "utils.h"
#include "xmlpool.h"
@@ -1372,6 +1373,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
if (!intelScreen->devinfo)
return false;
+ brw_process_intel_debug_variable(intelScreen);
+
intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7;
intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
@@ -1407,6 +1410,13 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
(ret != -1 || errno != EINVAL);
}
+ struct drm_i915_getparam getparam;
+ getparam.param = I915_PARAM_CMD_PARSER_VERSION;
+ getparam.value = &intelScreen->cmd_parser_version;
+ const int ret = drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam);
+ if (ret == -1)
+ intelScreen->cmd_parser_version = 0;
+
psp->extensions = !intelScreen->has_context_reset_notification
? intelScreenExtensions : intelRobustScreenExtensions;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index e7a14903d6e..742b3d30eee 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -72,7 +72,13 @@ struct intel_screen
* Configuration cache with default values for all contexts
*/
driOptionCache optionCache;
-};
+
+ /**
+ * Version of the command parser reported by the
+ * I915_PARAM_CMD_PARSER_VERSION parameter
+ */
+ int cmd_parser_version;
+ };
extern void intelDestroyContext(__DRIcontext * driContextPriv);
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
index 777a682ad21..b0181ad1d75 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -93,7 +93,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx,
} else {
intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj,
intel_image,
- false);
+ 0);
/* Even if the object currently has a mipmap tree associated
* with it, this one is a more likely candidate to represent the
@@ -144,10 +144,8 @@ intel_alloc_texture_storage(struct gl_context *ctx,
first_image->TexFormat,
0, levels - 1,
width, height, depth,
- false, /* expect_accelerated */
num_samples,
- INTEL_MIPTREE_TILING_ANY,
- false);
+ INTEL_MIPTREE_TILING_ANY, 0);
if (intel_texobj->mt == NULL) {
return false;
@@ -341,7 +339,7 @@ intel_set_texture_storage_for_buffer_object(struct gl_context *ctx,
buffer_offset,
image->Width, image->Height, image->Depth,
row_stride,
- false /*disable_aux_buffers*/);
+ 0);
if (!intel_texobj->mt)
return false;
diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h
index f048e846d55..402a3891ecd 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.h
+++ b/src/mesa/drivers/dri/i965/intel_tex.h
@@ -53,7 +53,7 @@ struct intel_mipmap_tree *
intel_miptree_create_for_teximage(struct brw_context *brw,
struct intel_texture_object *intelObj,
struct intel_texture_image *intelImage,
- bool expect_accelerated_upload);
+ uint32_t layout_flags);
GLuint intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit);
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 7952ee5ad88..ebe84b664d4 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -36,7 +36,7 @@ struct intel_mipmap_tree *
intel_miptree_create_for_teximage(struct brw_context *brw,
struct intel_texture_object *intelObj,
struct intel_texture_image *intelImage,
- bool expect_accelerated_upload)
+ uint32_t layout_flags)
{
GLuint lastLevel;
int width, height, depth;
@@ -79,10 +79,9 @@ intel_miptree_create_for_teximage(struct brw_context *brw,
width,
height,
depth,
- expect_accelerated_upload,
intelImage->base.Base.NumSamples,
INTEL_MIPTREE_TILING_ANY,
- false);
+ layout_flags);
}
static void
@@ -155,7 +154,7 @@ intel_set_texture_image_bo(struct gl_context *ctx,
GLuint width, GLuint height,
GLuint pitch,
GLuint tile_x, GLuint tile_y,
- bool disable_aux_buffers)
+ uint32_t layout_flags)
{
struct brw_context *brw = brw_context(ctx);
struct intel_texture_image *intel_image = intel_texture_image(image);
@@ -171,7 +170,7 @@ intel_set_texture_image_bo(struct gl_context *ctx,
intel_image->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat,
0, width, height, 1, pitch,
- disable_aux_buffers);
+ layout_flags);
if (intel_image->mt == NULL)
return;
intel_image->mt->target = target;
@@ -255,8 +254,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
rb->Base.Base.Width,
rb->Base.Base.Height,
rb->mt->pitch,
- 0, 0,
- false /*disable_aux_buffers*/);
+ 0, 0, 0);
_mesa_unlock_texture(&brw->ctx, texObj);
}
@@ -349,7 +347,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
image->width, image->height,
image->pitch,
image->tile_x, image->tile_y,
- true /*disable_aux_buffers*/);
+ MIPTREE_LAYOUT_DISABLE_AUX);
}
/**
@@ -486,8 +484,15 @@ intel_get_tex_image(struct gl_context *ctx,
if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0,
texImage->Width, texImage->Height,
texImage->Depth, format, type,
- pixels, &ctx->Pack))
+ pixels, &ctx->Pack)) {
+ /* Flush to guarantee coherency between the render cache and other
+ * caches the PBO could potentially be bound to after this point.
+ * See the related comment in intelReadPixels() for a more detailed
+ * explanation.
+ */
+ intel_batchbuffer_emit_mi_flush(brw);
return;
+ }
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
}
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 1d827683b99..4991c2997ef 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -47,8 +47,10 @@ intel_update_max_level(struct intel_texture_object *intelObj,
{
struct gl_texture_object *tObj = &intelObj->base;
- if (sampler->MinFilter == GL_NEAREST ||
- sampler->MinFilter == GL_LINEAR) {
+ if (!tObj->_MipmapComplete ||
+ (tObj->_RenderToTexture &&
+ (sampler->MinFilter == GL_NEAREST ||
+ sampler->MinFilter == GL_LINEAR))) {
intelObj->_MaxLevel = tObj->BaseLevel;
} else {
intelObj->_MaxLevel = tObj->_MaxLevel;
@@ -142,10 +144,9 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit)
width,
height,
depth,
- true,
0 /* num_samples */,
INTEL_MIPTREE_TILING_ANY,
- false);
+ MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
if (!intelObj->mt)
return false;
}
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 206a76e9242..8010fb4f610 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -26,11 +26,13 @@
#include "brw_cfg.h"
#include "program/program.h"
+using namespace brw;
+
class cmod_propagation_test : public ::testing::Test {
virtual void SetUp();
public:
- struct brw_context *brw;
+ struct brw_compiler *compiler;
struct brw_device_info *devinfo;
struct gl_context *ctx;
struct brw_wm_prog_data *prog_data;
@@ -42,30 +44,31 @@ public:
class cmod_propagation_fs_visitor : public fs_visitor
{
public:
- cmod_propagation_fs_visitor(struct brw_context *brw,
+ cmod_propagation_fs_visitor(struct brw_compiler *compiler,
struct brw_wm_prog_data *prog_data,
struct gl_shader_program *shader_prog)
- : fs_visitor(brw, NULL, NULL, prog_data, shader_prog, NULL, 8) {}
+ : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
+ &prog_data->base, shader_prog,
+ (struct gl_program *) NULL, 8, -1) {}
};
void cmod_propagation_test::SetUp()
{
- brw = (struct brw_context *)calloc(1, sizeof(*brw));
- devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
- brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
- brw->intelScreen->devinfo = devinfo;
- ctx = &brw->ctx;
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
fp = ralloc(NULL, struct brw_fragment_program);
prog_data = ralloc(NULL, struct brw_wm_prog_data);
shader_prog = ralloc(NULL, struct gl_shader_program);
- v = new cmod_propagation_fs_visitor(brw, prog_data, shader_prog);
+ v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog);
_mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
- brw->gen = devinfo->gen = 4;
+ devinfo->gen = 4;
}
static fs_inst *
@@ -100,13 +103,13 @@ cmod_propagation(fs_visitor *v)
TEST_F(cmod_propagation_test, basic)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
- v->emit(BRW_OPCODE_ADD, dest, src0, src1);
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -132,13 +135,13 @@ TEST_F(cmod_propagation_test, basic)
TEST_F(cmod_propagation_test, cmp_nonzero)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg nonzero(1.0f);
- v->emit(BRW_OPCODE_ADD, dest, src0, src1);
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, nonzero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -165,12 +168,12 @@ TEST_F(cmod_propagation_test, cmp_nonzero)
TEST_F(cmod_propagation_test, non_cmod_instruction)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::uint_type);
fs_reg src0 = v->vgrf(glsl_type::uint_type);
fs_reg zero(0u);
- v->emit(BRW_OPCODE_FBL, dest, src0);
- v->emit(BRW_OPCODE_CMP, v->reg_null_ud, dest, zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.FBL(dest, src0);
+ bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -197,16 +200,15 @@ TEST_F(cmod_propagation_test, non_cmod_instruction)
TEST_F(cmod_propagation_test, intervening_flag_write)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
- v->emit(BRW_OPCODE_ADD, dest, src0, src1);
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, src2, zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
+ bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -236,17 +238,16 @@ TEST_F(cmod_propagation_test, intervening_flag_write)
TEST_F(cmod_propagation_test, intervening_flag_read)
{
+ const fs_builder &bld = v->bld;
fs_reg dest0 = v->vgrf(glsl_type::float_type);
fs_reg dest1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
- v->emit(BRW_OPCODE_ADD, dest0, src0, src1);
- v->emit(BRW_OPCODE_SEL, dest1, src2, zero)
- ->predicate = BRW_PREDICATE_NORMAL;
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.ADD(dest0, src0, src1);
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -276,16 +277,16 @@ TEST_F(cmod_propagation_test, intervening_flag_read)
TEST_F(cmod_propagation_test, intervening_dest_write)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::vec4_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
fs_reg zero(0.0f);
- v->emit(BRW_OPCODE_ADD, offset(dest, 2), src0, src1);
- v->emit(SHADER_OPCODE_TEX, dest, src2)
+ bld.ADD(offset(dest, 2), src0, src1);
+ bld.emit(SHADER_OPCODE_TEX, dest, src2)
->regs_written = 4;
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, offset(dest, 2), zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.CMP(bld.null_reg_f(), offset(dest, 2), zero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -316,18 +317,16 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
{
+ const fs_builder &bld = v->bld;
fs_reg dest0 = v->vgrf(glsl_type::float_type);
fs_reg dest1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
- v->emit(BRW_OPCODE_ADD, dest0, src0, src1)
- ->conditional_mod = BRW_CONDITIONAL_GE;
- v->emit(BRW_OPCODE_SEL, dest1, src2, zero)
- ->predicate = BRW_PREDICATE_NORMAL;
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -357,14 +356,14 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
TEST_F(cmod_propagation_test, negate)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
- v->emit(BRW_OPCODE_ADD, dest, src0, src1);
+ bld.ADD(dest, src0, src1);
dest.negate = true;
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -390,13 +389,13 @@ TEST_F(cmod_propagation_test, negate)
TEST_F(cmod_propagation_test, movnz)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_CMP, dest, src0, src1)
- ->conditional_mod = BRW_CONDITIONAL_GE;
- v->emit(BRW_OPCODE_MOV, v->reg_null_f, dest)
- ->conditional_mod = BRW_CONDITIONAL_NZ;
+ bld.CMP(dest, src0, src1, BRW_CONDITIONAL_GE);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(bld.null_reg_f(), dest));
/* = Before =
*
@@ -422,14 +421,14 @@ TEST_F(cmod_propagation_test, movnz)
TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::int_type);
fs_reg src1 = v->vgrf(glsl_type::int_type);
fs_reg zero(0.0f);
- v->emit(BRW_OPCODE_ADD, dest, src0, src1);
- v->emit(BRW_OPCODE_CMP, v->reg_null_f, retype(dest, BRW_REGISTER_TYPE_F),
- zero)
- ->conditional_mod = BRW_CONDITIONAL_GE;
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero,
+ BRW_CONDITIONAL_GE);
/* = Before =
*
@@ -456,15 +455,15 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
TEST_F(cmod_propagation_test, andnz_one)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
fs_reg one(1);
- v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
- ->conditional_mod = BRW_CONDITIONAL_L;
- v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one)
- ->conditional_mod = BRW_CONDITIONAL_NZ;
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.null_reg_d(), dest, one));
/* = Before =
* 0: cmp.l.f0(8) dest:F src0:F 0F
@@ -491,15 +490,15 @@ TEST_F(cmod_propagation_test, andnz_one)
TEST_F(cmod_propagation_test, andnz_non_one)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
fs_reg nonone(38);
- v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
- ->conditional_mod = BRW_CONDITIONAL_L;
- v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, nonone)
- ->conditional_mod = BRW_CONDITIONAL_NZ;
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.null_reg_d(), dest, nonone));
/* = Before =
* 0: cmp.l.f0(8) dest:F src0:F 0F
@@ -526,15 +525,15 @@ TEST_F(cmod_propagation_test, andnz_non_one)
TEST_F(cmod_propagation_test, andz_one)
{
+ const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg zero(0.0f);
fs_reg one(1);
- v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
- ->conditional_mod = BRW_CONDITIONAL_L;
- v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one)
- ->conditional_mod = BRW_CONDITIONAL_Z;
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_Z,
+ bld.AND(bld.null_reg_d(), dest, one));
/* = Before =
* 0: cmp.l.f0(8) dest:F src0:F 0F
diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
index 4c91af3ea8d..3ef0cb319eb 100644
--- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
@@ -26,11 +26,13 @@
#include "brw_cfg.h"
#include "program/program.h"
+using namespace brw;
+
class saturate_propagation_test : public ::testing::Test {
virtual void SetUp();
public:
- struct brw_context *brw;
+ struct brw_compiler *compiler;
struct brw_device_info *devinfo;
struct gl_context *ctx;
struct brw_wm_prog_data *prog_data;
@@ -42,30 +44,31 @@ public:
class saturate_propagation_fs_visitor : public fs_visitor
{
public:
- saturate_propagation_fs_visitor(struct brw_context *brw,
+ saturate_propagation_fs_visitor(struct brw_compiler *compiler,
struct brw_wm_prog_data *prog_data,
struct gl_shader_program *shader_prog)
- : fs_visitor(brw, NULL, NULL, prog_data, shader_prog, NULL, 8) {}
+ : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
+ &prog_data->base, shader_prog,
+ (struct gl_program *) NULL, 8, -1) {}
};
void saturate_propagation_test::SetUp()
{
- brw = (struct brw_context *)calloc(1, sizeof(*brw));
- devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
- brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
- brw->intelScreen->devinfo = devinfo;
- ctx = &brw->ctx;
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
fp = ralloc(NULL, struct brw_fragment_program);
prog_data = ralloc(NULL, struct brw_wm_prog_data);
shader_prog = ralloc(NULL, struct gl_shader_program);
- v = new saturate_propagation_fs_visitor(brw, prog_data, shader_prog);
+ v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog);
_mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
- brw->gen = devinfo->gen = 4;
+ devinfo->gen = 4;
}
static fs_inst *
@@ -100,13 +103,13 @@ saturate_propagation(fs_visitor *v)
TEST_F(saturate_propagation_test, basic)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
+ bld.ADD(dst0, src0, src1);
+ set_saturate(true, bld.MOV(dst1, dst0));
/* = Before =
*
@@ -135,15 +138,15 @@ TEST_F(saturate_propagation_test, basic)
TEST_F(saturate_propagation_test, other_non_saturated_use)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg dst2 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
- v->emit(BRW_OPCODE_ADD, dst2, dst0, src0);
+ bld.ADD(dst0, src0, src1);
+ set_saturate(true, bld.MOV(dst1, dst0));
+ bld.ADD(dst2, dst0, src0);
/* = Before =
*
@@ -173,14 +176,14 @@ TEST_F(saturate_propagation_test, other_non_saturated_use)
TEST_F(saturate_propagation_test, predicated_instruction)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_ADD, dst0, src0, src1)
+ bld.ADD(dst0, src0, src1)
->predicate = BRW_PREDICATE_NORMAL;
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
+ set_saturate(true, bld.MOV(dst1, dst0));
/* = Before =
*
@@ -208,14 +211,14 @@ TEST_F(saturate_propagation_test, predicated_instruction)
TEST_F(saturate_propagation_test, neg_mov_sat)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
+ bld.ADD(dst0, src0, src1);
dst0.negate = true;
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
+ set_saturate(true, bld.MOV(dst1, dst0));
/* = Before =
*
@@ -243,14 +246,14 @@ TEST_F(saturate_propagation_test, neg_mov_sat)
TEST_F(saturate_propagation_test, abs_mov_sat)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
+ bld.ADD(dst0, src0, src1);
dst0.abs = true;
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
+ set_saturate(true, bld.MOV(dst1, dst0));
/* = Before =
*
@@ -278,16 +281,15 @@ TEST_F(saturate_propagation_test, abs_mov_sat)
TEST_F(saturate_propagation_test, producer_saturates)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg dst2 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_ADD, dst0, src0, src1)
- ->saturate = true;
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
- v->emit(BRW_OPCODE_MOV, dst2, dst0);
+ set_saturate(true, bld.ADD(dst0, src0, src1));
+ set_saturate(true, bld.MOV(dst1, dst0));
+ bld.MOV(dst2, dst0);
/* = Before =
*
@@ -318,16 +320,15 @@ TEST_F(saturate_propagation_test, producer_saturates)
TEST_F(saturate_propagation_test, intervening_saturating_copy)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg dst2 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
- v->emit(BRW_OPCODE_MOV, dst2, dst0)
- ->saturate = true;
+ bld.ADD(dst0, src0, src1);
+ set_saturate(true, bld.MOV(dst1, dst0));
+ set_saturate(true, bld.MOV(dst2, dst0));
/* = Before =
*
@@ -360,16 +361,16 @@ TEST_F(saturate_propagation_test, intervening_saturating_copy)
TEST_F(saturate_propagation_test, intervening_dest_write)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::vec4_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
- v->emit(BRW_OPCODE_ADD, offset(dst0, 2), src0, src1);
- v->emit(SHADER_OPCODE_TEX, dst0, src2)
+ bld.ADD(offset(dst0, 2), src0, src1);
+ bld.emit(SHADER_OPCODE_TEX, dst0, src2)
->regs_written = 4;
- v->emit(BRW_OPCODE_MOV, dst1, offset(dst0, 2))
- ->saturate = true;
+ set_saturate(true, bld.MOV(dst1, offset(dst0, 2)));
/* = Before =
*
@@ -400,18 +401,17 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat)
{
+ const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg dst2 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- v->emit(BRW_OPCODE_MUL, dst0, src0, src1);
+ bld.MUL(dst0, src0, src1);
dst0.negate = true;
- v->emit(BRW_OPCODE_MOV, dst1, dst0)
- ->saturate = true;
+ set_saturate(true, bld.MOV(dst1, dst0));
dst0.negate = false;
- v->emit(BRW_OPCODE_MOV, dst2, dst0)
- ->saturate = true;
+ set_saturate(true, bld.MOV(dst2, dst0));
/* = Before =
*
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index 2ef52e9fd6b..84e43fa75cd 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -33,7 +33,7 @@ class copy_propagation_test : public ::testing::Test {
virtual void SetUp();
public:
- struct brw_context *brw;
+ struct brw_compiler *compiler;
struct brw_device_info *devinfo;
struct gl_context *ctx;
struct gl_shader_program *shader_prog;
@@ -44,12 +44,11 @@ public:
class copy_propagation_vec4_visitor : public vec4_visitor
{
public:
- copy_propagation_vec4_visitor(struct brw_context *brw,
+ copy_propagation_vec4_visitor(struct brw_compiler *compiler,
struct gl_shader_program *shader_prog)
- : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
+ : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
MESA_SHADER_VERTEX, NULL,
- false /* no_spills */,
- ST_NONE, ST_NONE, ST_NONE)
+ false /* no_spills */, -1)
{
}
@@ -93,21 +92,20 @@ protected:
void copy_propagation_test::SetUp()
{
- brw = (struct brw_context *)calloc(1, sizeof(*brw));
- devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
- brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
- brw->intelScreen->devinfo = devinfo;
- ctx = &brw->ctx;
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
vp = ralloc(NULL, struct brw_vertex_program);
shader_prog = ralloc(NULL, struct gl_shader_program);
- v = new copy_propagation_vec4_visitor(brw, shader_prog);
+ v = new copy_propagation_vec4_visitor(compiler, shader_prog);
_mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
- brw->gen = devinfo->gen = 4;
+ devinfo->gen = 4;
}
static void
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index c8c67574e95..de2afd39cfe 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -35,7 +35,7 @@ class register_coalesce_test : public ::testing::Test {
virtual void SetUp();
public:
- struct brw_context *brw;
+ struct brw_compiler *compiler;
struct brw_device_info *devinfo;
struct gl_context *ctx;
struct gl_shader_program *shader_prog;
@@ -47,12 +47,11 @@ public:
class register_coalesce_vec4_visitor : public vec4_visitor
{
public:
- register_coalesce_vec4_visitor(struct brw_context *brw,
+ register_coalesce_vec4_visitor(struct brw_compiler *compiler,
struct gl_shader_program *shader_prog)
- : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
+ : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
MESA_SHADER_VERTEX, NULL,
- false /* no_spills */,
- ST_NONE, ST_NONE, ST_NONE)
+ false /* no_spills */, -1)
{
}
@@ -96,21 +95,20 @@ protected:
void register_coalesce_test::SetUp()
{
- brw = (struct brw_context *)calloc(1, sizeof(*brw));
- devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
- brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
- brw->intelScreen->devinfo = devinfo;
- ctx = &brw->ctx;
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
vp = ralloc(NULL, struct brw_vertex_program);
shader_prog = ralloc(NULL, struct gl_shader_program);
- v = new register_coalesce_vec4_visitor(brw, shader_prog);
+ v = new register_coalesce_vec4_visitor(compiler, shader_prog);
_mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
- brw->gen = devinfo->gen = 4;
+ devinfo->gen = 4;
}
static void
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index 6c479f5f0c6..c78d4baa124 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -242,7 +242,7 @@ static void
nouveau_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
GLenum attachment, struct gl_renderbuffer *rb)
{
- _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+ _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb);
context_dirty(ctx, FRAMEBUFFER);
}
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
index c0c7b26bbf7..1398385b262 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
@@ -31,6 +31,8 @@
#include "nv10_3d.xml.h"
#include "nv10_driver.h"
+#include "util/simple_list.h"
+
void
nv10_emit_clip_plane(struct gl_context *ctx, int emit)
{
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c
index f0acbed8560..41395516ea4 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c
@@ -32,6 +32,8 @@
#include "nv10_driver.h"
#include "nv20_driver.h"
+#include "util/simple_list.h"
+
#define LIGHT_MODEL_AMBIENT_R(side) \
((side) ? NV20_3D_LIGHT_MODEL_BACK_AMBIENT_R : \
NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_R)
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
index b0a6bd573b6..6fe70b5c9d0 100644
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -2215,9 +2215,9 @@ GLboolean r200ValidateState( struct gl_context *ctx )
GLuint new_state = rmesa->radeon.NewGLState;
if (new_state & _NEW_BUFFERS) {
- _mesa_update_framebuffer(ctx);
+ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer);
/* this updates the DrawBuffer's Width/Height if it's a FBO */
- _mesa_update_draw_buffer_bounds(ctx);
+ _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
R200_STATECHANGE(rmesa, ctx);
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index 0ca526d2a02..2a8bd6c9edc 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -220,9 +220,9 @@ void radeon_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb)
*/
if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
/* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
- _mesa_update_framebuffer(ctx);
+ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer);
/* this updates the DrawBuffer's Width/Height if it's a FBO */
- _mesa_update_draw_buffer_bounds(ctx);
+ _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
}
if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index 97022f95953..ef62d097bae 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -723,7 +723,7 @@ radeon_framebuffer_renderbuffer(struct gl_context * ctx,
"%s(%p, fb %p, rb %p) \n",
__func__, ctx, fb, rb);
- _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+ _mesa_FramebufferRenderbuffer_sw(ctx, fb, attachment, rb);
radeon_draw_buffer(ctx, fb);
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index c45bb513dca..cba3d9c9689 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -1994,9 +1994,9 @@ GLboolean radeonValidateState( struct gl_context *ctx )
GLuint new_state = rmesa->radeon.NewGLState;
if (new_state & _NEW_BUFFERS) {
- _mesa_update_framebuffer(ctx);
+ _mesa_update_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer);
/* this updates the DrawBuffer's Width/Height if it's a FBO */
- _mesa_update_draw_buffer_bounds(ctx);
+ _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
RADEON_STATECHANGE(rmesa, ctx);
}
diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c
index 2ddb474dde7..2d4bb702fc2 100644
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -62,7 +62,9 @@
#include "swrast/s_context.h"
#include <sys/types.h>
-#include <sys/sysctl.h>
+#ifdef HAVE_SYS_SYSCTL_H
+# include <sys/sysctl.h>
+#endif
const __DRIextension **__driDriverGetExtensions_swrast(void);
@@ -958,6 +960,7 @@ static const __DRIextension *swrast_driver_extensions[] = {
&driCoreExtension.base,
&driSWRastExtension.base,
&driCopySubBufferExtension.base,
+ &dri2ConfigQueryExtension.base,
&swrast_vtable.base,
NULL
};
diff --git a/src/mesa/drivers/haiku/swrast/SConscript b/src/mesa/drivers/haiku/swrast/SConscript
deleted file mode 100644
index 907325e3252..00000000000
--- a/src/mesa/drivers/haiku/swrast/SConscript
+++ /dev/null
@@ -1,33 +0,0 @@
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPPATH = [
- '#/src',
- '#/src/mapi',
- '#/src/mesa',
- '#/src/mesa/main',
- '#/include/HaikuGL',
- '/boot/system/develop/headers/private',
- Dir('../../../mapi'), # src/mapi build path for python-generated GL API files/headers
-])
-
-env.Prepend(LIBS = [
- mesautil,
- glsl,
- mesa,
-])
-
-env.Prepend(LIBS = [libgl])
-
-sources = [
- 'SoftwareRast.cpp'
-]
-
-# Disallow undefined symbols
-#env.Append(SHLINKFLAGS = ['-Wl,-z,defs'])
-
-libswrast = env.SharedLibrary(
- target = 'swrast',
- source = sources
-)
diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp b/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp
deleted file mode 100644
index 813ad1ff27d..00000000000
--- a/src/mesa/drivers/haiku/swrast/SoftwareRast.cpp
+++ /dev/null
@@ -1,697 +0,0 @@
-/*
- * Copyright 2006-2012, Haiku, Inc. All rights reserved.
- * Distributed under the terms of the MIT License.
- *
- * Authors:
- * Jérôme Duval, [email protected]
- * Philippe Houdoin, [email protected]
- * Artur Wyszynski, [email protected]
- * Alexander von Gluck, [email protected]
- */
-
-
-#include <kernel/image.h>
-#include "SoftwareRast.h"
-
-#include <Autolock.h>
-#include <interface/DirectWindowPrivate.h>
-#include <GraphicsDefs.h>
-#include <Screen.h>
-#include <stdio.h>
-#include <string.h>
-
-extern "C" {
-#include "extensions.h"
-#include "drivers/common/driverfuncs.h"
-#include "drivers/common/meta.h"
-#include "main/api_exec.h"
-#include "main/colormac.h"
-#include "main/cpuinfo.h"
-#include "main/buffers.h"
-#include "main/formats.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-#include "main/version.h"
-#include "main/vtxfmt.h"
-#include "swrast/swrast.h"
-#include "swrast/s_renderbuffer.h"
-#include "swrast_setup/swrast_setup.h"
-#include "tnl/tnl.h"
-#include "tnl/t_context.h"
-#include "tnl/t_pipeline.h"
-#include "vbo/vbo.h"
-
-
-#ifdef DEBUG
-# define TRACE(x...) printf("MesaSoftwareRast: " x)
-# define CALLED() printf("MesaSoftwareRast: %s\n", __PRETTY_FUNCTION__)
-#else
-# define TRACE(x...)
-# define CALLED()
-#endif
-
-#define ERROR(x...) printf("MesaSoftwareRast: " x)
-}
-
-
-extern const char* color_space_name(color_space space);
-
-
-extern "C" _EXPORT BGLRenderer*
-instantiate_gl_renderer(BGLView* view, ulong options,
- BGLDispatcher* dispatcher)
-{
- return new MesaSoftwareRast(view, options, dispatcher);
-}
-
-
-MesaSoftwareRast::MesaSoftwareRast(BGLView* view, ulong options,
- BGLDispatcher* dispatcher)
- : BGLRenderer(view, options, dispatcher),
- fBitmap(NULL),
- fDirectModeEnabled(false),
- fInfo(NULL),
- fInfoLocker("info locker"),
- fVisual(NULL),
- fFrameBuffer(NULL),
- fFrontRenderBuffer(NULL),
- fBackRenderBuffer(NULL),
- fColorSpace(B_NO_COLOR_SPACE)
-{
- CALLED();
-
- fColorSpace = BScreen(GLView()->Window()).ColorSpace();
-
- // We force single buffering for the time being
- options &= ~BGL_DOUBLE;
-
- const GLboolean rgbFlag = ((options & BGL_INDEX) == 0);
- const GLboolean alphaFlag = ((options & BGL_ALPHA) == BGL_ALPHA);
- const GLboolean dblFlag = ((options & BGL_DOUBLE) == BGL_DOUBLE);
- const GLboolean stereoFlag = false;
- const GLint depth = (options & BGL_DEPTH) ? 16 : 0;
- const GLint stencil = (options & BGL_STENCIL) ? 8 : 0;
- const GLint accum = (options & BGL_ACCUM) ? 16 : 0;
- const GLint red = rgbFlag ? 8 : 0;
- const GLint green = rgbFlag ? 8 : 0;
- const GLint blue = rgbFlag ? 8 : 0;
- const GLint alpha = alphaFlag ? 8 : 0;
-
- fOptions = options; // | BGL_INDIRECT;
- struct dd_function_table functions;
-
- fVisual = _mesa_create_visual(dblFlag, stereoFlag, red, green,
- blue, alpha, depth, stencil, accum, accum, accum,
- alpha ? accum : 0, 1);
-
- // Initialize device driver function table
- _mesa_init_driver_functions(&functions);
-
- functions.GetString = _GetString;
- functions.UpdateState = _UpdateState;
- functions.MapRenderbuffer = _RenderBufferMap;
- functions.Flush = _Flush;
-
- // create core context
- // We inherit gl_context to this class
- _mesa_initialize_context(this, API_OPENGL_COMPAT, fVisual, NULL,
- &functions);
-
- /* Initialize the software rasterizer and helper modules. */
- _swrast_CreateContext(this);
- _vbo_CreateContext(this);
- _tnl_CreateContext(this);
- _swsetup_CreateContext(this);
- _swsetup_Wakeup(this);
-
- // Use default TCL pipeline
- TNL_CONTEXT(this)->Driver.RunPipeline = _tnl_run_pipeline;
-
- _mesa_meta_init(this);
- _mesa_enable_sw_extensions(this);
-
- _mesa_compute_version(this);
-
- _mesa_initialize_dispatch_tables(this);
- _mesa_initialize_vbo_vtxfmt(this);
-
- // create core framebuffer
- fFrameBuffer = _mesa_create_framebuffer(fVisual);
- if (fFrameBuffer == NULL) {
- ERROR("%s: Unable to calloc GL FrameBuffer!\n", __func__);
- _mesa_destroy_visual(fVisual);
- return;
- }
-
- // Setup front render buffer
- fFrontRenderBuffer = _NewRenderBuffer(true);
- if (fFrontRenderBuffer == NULL) {
- ERROR("%s: FrontRenderBuffer is requested but unallocated!\n",
- __func__);
- _mesa_destroy_visual(fVisual);
- free(fFrameBuffer);
- return;
- }
- _mesa_add_renderbuffer(fFrameBuffer, BUFFER_FRONT_LEFT,
- &fFrontRenderBuffer->Base);
-
- // Setup back render buffer (if requested)
- if (fVisual->doubleBufferMode) {
- fBackRenderBuffer = _NewRenderBuffer(false);
- if (fBackRenderBuffer == NULL) {
- ERROR("%s: BackRenderBuffer is requested but unallocated!\n",
- __func__);
- _mesa_destroy_visual(fVisual);
- free(fFrameBuffer);
- return;
- }
- _mesa_add_renderbuffer(fFrameBuffer, BUFFER_BACK_LEFT,
- &fBackRenderBuffer->Base);
- }
-
- _swrast_add_soft_renderbuffers(fFrameBuffer, GL_FALSE,
- fVisual->haveDepthBuffer, fVisual->haveStencilBuffer,
- fVisual->haveAccumBuffer, alphaFlag, GL_FALSE);
-
- BRect bounds = view->Bounds();
- fWidth = (GLint)bounds.Width();
- fHeight = (GLint)bounds.Height();
-
- // some stupid applications (Quake2) don't even think about calling LockGL()
- // before using glGetString and its glGet*() friends...
- // so make sure there is at least a valid context.
-
- if (!_mesa_get_current_context()) {
- LockGL();
- // not needed, we don't have a looper yet: UnlockLooper();
- }
-}
-
-
-MesaSoftwareRast::~MesaSoftwareRast()
-{
- CALLED();
- _swsetup_DestroyContext(this);
- _swrast_DestroyContext(this);
- _tnl_DestroyContext(this);
- _vbo_DestroyContext(this);
- _mesa_destroy_visual(fVisual);
- _mesa_destroy_framebuffer(fFrameBuffer);
- _mesa_destroy_context(this);
-
- free(fInfo);
- free(fFrameBuffer);
-
- delete fBitmap;
-}
-
-
-void
-MesaSoftwareRast::LockGL()
-{
- CALLED();
- BGLRenderer::LockGL();
-
- _mesa_make_current(this, fFrameBuffer, fFrameBuffer);
-
- color_space colorSpace = BScreen(GLView()->Window()).ColorSpace();
-
- GLuint width = fWidth;
- GLuint height = fHeight;
-
- BAutolock lock(fInfoLocker);
- if (fDirectModeEnabled && fInfo != NULL) {
- width = fInfo->window_bounds.right
- - fInfo->window_bounds.left + 1;
- height = fInfo->window_bounds.bottom
- - fInfo->window_bounds.top + 1;
- }
-
- if (fColorSpace != colorSpace) {
- fColorSpace = colorSpace;
- _SetupRenderBuffer(&fFrontRenderBuffer->Base, fColorSpace);
- if (fVisual->doubleBufferMode)
- _SetupRenderBuffer(&fBackRenderBuffer->Base, fColorSpace);
- }
-
- _CheckResize(width, height);
-}
-
-
-void
-MesaSoftwareRast::UnlockGL()
-{
- CALLED();
- _mesa_make_current(this, NULL, NULL);
- BGLRenderer::UnlockGL();
-}
-
-
-void
-MesaSoftwareRast::SwapBuffers(bool VSync)
-{
- CALLED();
-
- if (!fBitmap)
- return;
-
- if (fVisual->doubleBufferMode)
- _mesa_notifySwapBuffers(this);
-
- if (!fDirectModeEnabled || fInfo == NULL) {
- if (GLView()->LockLooperWithTimeout(1000) == B_OK) {
- GLView()->DrawBitmap(fBitmap, B_ORIGIN);
- GLView()->UnlockLooper();
- }
- } else {
- // TODO: Here the BGLView needs to be drawlocked.
- _CopyToDirect();
- }
-
- if (VSync) {
- BScreen screen(GLView()->Window());
- screen.WaitForRetrace();
- }
-}
-
-
-void
-MesaSoftwareRast::Draw(BRect updateRect)
-{
- CALLED();
- if (fBitmap && (!fDirectModeEnabled || (fInfo == NULL)))
- GLView()->DrawBitmap(fBitmap, updateRect, updateRect);
-}
-
-
-status_t
-MesaSoftwareRast::CopyPixelsOut(BPoint location, BBitmap* bitmap)
-{
- CALLED();
- color_space scs = fBitmap->ColorSpace();
- color_space dcs = bitmap->ColorSpace();
-
- if (scs != dcs && (scs != B_RGBA32 || dcs != B_RGB32)) {
- fprintf(stderr, "CopyPixelsOut(): incompatible color space: %s != %s\n",
- color_space_name(scs),
- color_space_name(dcs));
- return B_BAD_TYPE;
- }
-
- BRect sr = fBitmap->Bounds();
- BRect dr = bitmap->Bounds();
-
- sr = sr & dr.OffsetBySelf(location);
- dr = sr.OffsetByCopy(-location.x, -location.y);
-
- uint8* ps = (uint8*)fBitmap->Bits();
- uint8* pd = (uint8*)bitmap->Bits();
- uint32* s;
- uint32* d;
- uint32 y;
- for (y = (uint32)sr.top; y <= (uint32)sr.bottom; y++) {
- s = (uint32*)(ps + y * fBitmap->BytesPerRow());
- s += (uint32)sr.left;
-
- d = (uint32*)(pd + (y + (uint32)(dr.top - sr.top))
- * bitmap->BytesPerRow());
- d += (uint32)dr.left;
-
- memcpy(d, s, dr.IntegerWidth() * 4);
- }
- return B_OK;
-}
-
-
-status_t
-MesaSoftwareRast::CopyPixelsIn(BBitmap* bitmap, BPoint location)
-{
- CALLED();
- color_space scs = bitmap->ColorSpace();
- color_space dcs = fBitmap->ColorSpace();
-
- if (scs != dcs && (dcs != B_RGBA32 || scs != B_RGB32)) {
- fprintf(stderr, "CopyPixelsIn(): incompatible color space: %s != %s\n",
- color_space_name(scs),
- color_space_name(dcs));
- return B_BAD_TYPE;
- }
-
- BRect sr = bitmap->Bounds();
- BRect dr = fBitmap->Bounds();
-
- sr = sr & dr.OffsetBySelf(location);
- dr = sr.OffsetByCopy(-location.x, -location.y);
-
- uint8* ps = (uint8*)bitmap->Bits();
- uint8* pd = (uint8*)fBitmap->Bits();
- uint32* s;
- uint32* d;
- uint32 y;
- for (y = (uint32)sr.top; y <= (uint32)sr.bottom; y++) {
- s = (uint32*)(ps + y * bitmap->BytesPerRow());
- s += (uint32)sr.left;
-
- d = (uint32*)(pd + (y + (uint32)(dr.top - sr.top))
- * fBitmap->BytesPerRow());
- d += (uint32)dr.left;
-
- memcpy(d, s, dr.IntegerWidth() * 4);
- }
- return B_OK;
-}
-
-
-void
-MesaSoftwareRast::EnableDirectMode(bool enabled)
-{
- fDirectModeEnabled = enabled;
-}
-
-
-void
-MesaSoftwareRast::DirectConnected(direct_buffer_info* info)
-{
- // TODO: I'm not sure we need to do this: BGLView already
- // keeps a local copy of the direct_buffer_info passed by
- // BDirectWindow::DirectConnected().
- BAutolock lock(fInfoLocker);
- if (info) {
- if (!fInfo) {
- fInfo = (direct_buffer_info*)malloc(DIRECT_BUFFER_INFO_AREA_SIZE);
- if (!fInfo)
- return;
- }
- memcpy(fInfo, info, DIRECT_BUFFER_INFO_AREA_SIZE);
- } else if (fInfo) {
- free(fInfo);
- fInfo = NULL;
- }
-}
-
-
-void
-MesaSoftwareRast::FrameResized(float width, float height)
-{
- BAutolock lock(fInfoLocker);
- _CheckResize((GLuint)width, (GLuint)height);
-}
-
-
-void
-MesaSoftwareRast::_CheckResize(GLuint newWidth, GLuint newHeight)
-{
- CALLED();
-
- if (fBitmap && newWidth == fWidth
- && newHeight == fHeight) {
- return;
- }
-
- _mesa_resize_framebuffer(this, fFrameBuffer, newWidth, newHeight);
- fHeight = newHeight;
- fWidth = newWidth;
-
- _AllocateBitmap();
-}
-
-
-void
-MesaSoftwareRast::_AllocateBitmap()
-{
- CALLED();
-
- // allocate new size of back buffer bitmap
- delete fBitmap;
- fBitmap = NULL;
-
- if (fWidth < 1 || fHeight < 1) {
- TRACE("%s: Cannot allocate bitmap < 1x1!\n", __func__);
- return;
- }
-
- BRect rect(0.0, 0.0, fWidth - 1, fHeight - 1);
- fBitmap = new BBitmap(rect, fColorSpace);
-
- #if 0
- // Used for platform optimized drawing
- for (uint i = 0; i < fHeight; i++) {
- fRowAddr[fHeight - i - 1] = (GLvoid *)((GLubyte *)fBitmap->Bits()
- + i * fBitmap->BytesPerRow());
- }
- #endif
-
- fFrameBuffer->Width = fWidth;
- fFrameBuffer->Height = fHeight;
- TRACE("%s: Bitmap Size: %" B_PRIu32 "\n", __func__, fBitmap->BitsLength());
-
- fFrontRenderBuffer->Buffer = (GLubyte*)fBitmap->Bits();
-}
-
-
-// #pragma mark - static
-
-
-const GLubyte*
-MesaSoftwareRast::_GetString(gl_context* ctx, GLenum name)
-{
- switch (name) {
- case GL_VENDOR:
- return (const GLubyte*) "Mesa Project";
- case GL_RENDERER:
- return (const GLubyte*) "Software Rasterizer";
- default:
- // Let core library handle all other cases
- return NULL;
- }
-}
-
-
-void
-MesaSoftwareRast::_UpdateState(gl_context* ctx, GLuint new_state)
-{
- if (!ctx)
- return;
-
- CALLED();
- _swrast_InvalidateState(ctx, new_state);
- _swsetup_InvalidateState(ctx, new_state);
- _vbo_InvalidateState(ctx, new_state);
- _tnl_InvalidateState(ctx, new_state);
-}
-
-
-GLboolean
-MesaSoftwareRast::_RenderBufferStorage(gl_context* ctx,
- struct gl_renderbuffer* render, GLenum internalFormat,
- GLuint width, GLuint height)
-{
- CALLED();
-
- render->Width = width;
- render->Height = height;
-
- struct swrast_renderbuffer *swRenderBuffer = swrast_renderbuffer(render);
-
- swRenderBuffer->RowStride = width * _mesa_get_format_bytes(render->Format);
-
- return GL_TRUE;
-}
-
-
-GLboolean
-MesaSoftwareRast::_RenderBufferStorageMalloc(gl_context* ctx,
- struct gl_renderbuffer* render, GLenum internalFormat,
- GLuint width, GLuint height)
-{
- CALLED();
-
- render->Width = width;
- render->Height = height;
-
- struct swrast_renderbuffer *swRenderBuffer = swrast_renderbuffer(render);
-
- if (swRenderBuffer != NULL) {
- free(swRenderBuffer->Buffer);
- swRenderBuffer->RowStride
- = width * _mesa_get_format_bytes(render->Format);
-
- uint32 size = swRenderBuffer->RowStride * height;
- TRACE("%s: Allocate %" B_PRIu32 " bytes for RenderBuffer\n",
- __func__, size);
- swRenderBuffer->Buffer = (GLubyte*)malloc(size);
- if (!swRenderBuffer->Buffer) {
- ERROR("%s: Memory allocation failure!\n", __func__);
- return GL_FALSE;
- }
- } else {
- ERROR("%s: Couldn't obtain software renderbuffer!\n",
- __func__);
- return GL_FALSE;
- }
-
- return GL_TRUE;
-}
-
-
-void
-MesaSoftwareRast::_Flush(gl_context* ctx)
-{
- CALLED();
- MesaSoftwareRast* driverContext = static_cast<MesaSoftwareRast*>(ctx);
-
- //MesaSoftwareRast* driverContext = (MesaSoftwareRast*)ctx->DriverCtx;
- if ((driverContext->fOptions & BGL_DOUBLE) == 0) {
- // TODO: SwapBuffers() can call _CopyToDirect(), which should
- // be always called with with the BGLView drawlocked.
- // This is not always the case if called from here.
- driverContext->SwapBuffers();
- }
-}
-
-
-struct swrast_renderbuffer*
-MesaSoftwareRast::_NewRenderBuffer(bool front)
-{
- CALLED();
- struct swrast_renderbuffer *swRenderBuffer
- = (struct swrast_renderbuffer*)calloc(1, sizeof *swRenderBuffer);
-
- if (!swRenderBuffer) {
- ERROR("%s: Failed calloc RenderBuffer\n", __func__);
- return NULL;
- }
-
- _mesa_init_renderbuffer(&swRenderBuffer->Base, 0);
-
- swRenderBuffer->Base.ClassID = HAIKU_SWRAST_RENDERBUFFER_CLASS;
- swRenderBuffer->Base.RefCount = 1;
- swRenderBuffer->Base.Delete = _RenderBufferDelete;
-
- if (!front)
- swRenderBuffer->Base.AllocStorage = _RenderBufferStorageMalloc;
- else
- swRenderBuffer->Base.AllocStorage = _RenderBufferStorage;
-
- if (_SetupRenderBuffer(&swRenderBuffer->Base, fColorSpace) != B_OK) {
- free(swRenderBuffer);
- return NULL;
- }
-
- return swRenderBuffer;
-}
-
-
-status_t
-MesaSoftwareRast::_SetupRenderBuffer(struct gl_renderbuffer* rb,
- color_space colorSpace)
-{
- CALLED();
-
- rb->InternalFormat = GL_RGBA;
-
- switch (colorSpace) {
- case B_RGBA32:
- rb->_BaseFormat = GL_RGBA;
- rb->Format = MESA_FORMAT_B8G8R8A8_UNORM;
- break;
- case B_RGB32:
- rb->_BaseFormat = GL_RGB;
- rb->Format = MESA_FORMAT_B8G8R8X8_UNORM;
- break;
- case B_RGB24:
- rb->_BaseFormat = GL_RGB;
- rb->Format = MESA_FORMAT_BGR_UNORM8;
- break;
- case B_RGB16:
- rb->_BaseFormat = GL_RGB;
- rb->Format = MESA_FORMAT_B5G6R5_UNORM;
- break;
- case B_RGB15:
- rb->_BaseFormat = GL_RGB;
- rb->Format = MESA_FORMAT_B5G5R5A1_UNORM;
- break;
- default:
- fprintf(stderr, "Unsupported screen color space %s\n",
- color_space_name(fColorSpace));
- debugger("Unsupported OpenGL color space");
- return B_ERROR;
- }
- return B_OK;
-}
-
-
-/*! Y inverted Map RenderBuffer function
- We use a BBitmap for storage which has Y inverted.
- If the Mesa provided Map function ever allows external
- control of this we can omit this function.
-*/
-void
-MesaSoftwareRast::_RenderBufferMap(gl_context *ctx,
- struct gl_renderbuffer *rb, GLuint x, GLuint y, GLuint w, GLuint h,
- GLbitfield mode, GLubyte **mapOut, GLint *rowStrideOut)
-{
- if (rb->ClassID == HAIKU_SWRAST_RENDERBUFFER_CLASS) {
- struct swrast_renderbuffer *srb = swrast_renderbuffer(rb);
- const GLuint bpp = _mesa_get_format_bytes(rb->Format);
- GLint rowStride = rb->Width * bpp; // in Bytes
-
- y = rb->Height - y - 1;
-
- *rowStrideOut = -rowStride;
- *mapOut = (GLubyte *) srb->Buffer + y * rowStride + x * bpp;
- } else {
- _swrast_map_soft_renderbuffer(ctx, rb, x, y, w, h, mode,
- mapOut, rowStrideOut);
- }
-}
-
-
-void
-MesaSoftwareRast::_RenderBufferDelete(struct gl_context *ctx,
- struct gl_renderbuffer* rb)
-{
- CALLED();
- if (rb != NULL) {
- struct swrast_renderbuffer *swRenderBuffer
- = swrast_renderbuffer(rb);
- if (swRenderBuffer != NULL)
- free(swRenderBuffer->Buffer);
- }
- free(rb);
-}
-
-
-void
-MesaSoftwareRast::_CopyToDirect()
-{
- BAutolock lock(fInfoLocker);
-
- // check the bitmap size still matches the size
- if (fInfo->window_bounds.bottom - fInfo->window_bounds.top
- != fBitmap->Bounds().IntegerHeight()
- || fInfo->window_bounds.right - fInfo->window_bounds.left
- != fBitmap->Bounds().IntegerWidth())
- return;
-
- uint8 bytesPerPixel = fInfo->bits_per_pixel / 8;
- uint32 bytesPerRow = fBitmap->BytesPerRow();
- for (uint32 i = 0; i < fInfo->clip_list_count; i++) {
- clipping_rect *clip = &fInfo->clip_list[i];
- int32 height = clip->bottom - clip->top + 1;
- int32 bytesWidth
- = (clip->right - clip->left + 1) * bytesPerPixel;
- uint8* p = (uint8*)fInfo->bits + clip->top
- * fInfo->bytes_per_row + clip->left * bytesPerPixel;
- uint8* b = (uint8*)fBitmap->Bits()
- + (clip->top - fInfo->window_bounds.top) * bytesPerRow
- + (clip->left - fInfo->window_bounds.left)
- * bytesPerPixel;
-
- for (int y = 0; y < height; y++) {
- memcpy(p, b, bytesWidth);
- p += fInfo->bytes_per_row;
- b += bytesPerRow;
- }
- }
-}
diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.h b/src/mesa/drivers/haiku/swrast/SoftwareRast.h
deleted file mode 100644
index 8f0f0184863..00000000000
--- a/src/mesa/drivers/haiku/swrast/SoftwareRast.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2006-2012, Haiku, Inc. All rights reserved.
- * Distributed under the terms of the MIT License.
- *
- * Authors:
- * Jérôme Duval, [email protected]
- * Philippe Houdoin, [email protected]
- * Artur Wyszynski, [email protected]
- */
-#ifndef MESASOFTWARERENDERER_H
-#define MESASOFTWARERENDERER_H
-
-
-#define HAIKU_SWRAST_RENDERBUFFER_CLASS 0x737752 // swR
-
-
-#include "GLRenderer.h"
-
-extern "C" {
-#include "context.h"
-#include "main/version.h"
-#include "swrast/s_chan.h"
-#include "swrast/s_context.h"
-}
-
-
-class MesaSoftwareRast : public BGLRenderer, public gl_context {
-public:
- MesaSoftwareRast(BGLView* view,
- ulong bgl_options,
- BGLDispatcher* dispatcher);
- virtual ~MesaSoftwareRast();
-
- virtual void LockGL();
- virtual void UnlockGL();
-
- virtual void SwapBuffers(bool VSync = false);
- virtual void Draw(BRect updateRect);
- virtual status_t CopyPixelsOut(BPoint source, BBitmap* dest);
- virtual status_t CopyPixelsIn(BBitmap* source, BPoint dest);
- virtual void FrameResized(float width, float height);
-
- virtual void EnableDirectMode(bool enabled);
- virtual void DirectConnected(direct_buffer_info* info);
-
-private:
- static const GLubyte* _GetString(gl_context* ctx, GLenum name);
- void _CheckResize(GLuint newWidth, GLuint newHeight);
- static void _UpdateState(gl_context* ctx, GLuint newState);
- static void _Flush(gl_context *ctx);
-
- struct swrast_renderbuffer* _NewRenderBuffer(bool front);
- status_t _SetupRenderBuffer(struct gl_renderbuffer* rb,
- color_space colorSpace);
-
-/* Mesa callbacks */
- static void _RenderBufferDelete(struct gl_context *ctx,
- struct gl_renderbuffer* rb);
- static GLboolean _RenderBufferStorage(gl_context* ctx,
- struct gl_renderbuffer* render,
- GLenum internalFormat,
- GLuint width, GLuint height);
- static GLboolean _RenderBufferStorageMalloc(gl_context* ctx,
- struct gl_renderbuffer* render,
- GLenum internalFormat,
- GLuint width, GLuint height);
- static void _RenderBufferMap(gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint x, GLuint y, GLuint w, GLuint h,
- GLbitfield mode, GLubyte **mapOut,
- GLint *rowStrideOut);
-
- void _AllocateBitmap();
- void _CopyToDirect();
-
- BBitmap* fBitmap;
- bool fDirectModeEnabled;
- direct_buffer_info* fInfo;
- BLocker fInfoLocker;
- ulong fOptions;
-
- gl_config* fVisual;
-
- struct gl_framebuffer* fFrameBuffer;
- struct swrast_renderbuffer* fFrontRenderBuffer;
- struct swrast_renderbuffer* fBackRenderBuffer;
-
- GLuint fWidth;
- GLuint fHeight;
- color_space fColorSpace;
-
- void* fRowAddr[SWRAST_MAX_HEIGHT];
-};
-
-#endif // MESASOFTWARERENDERER_H
diff --git a/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef b/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef
deleted file mode 100644
index cb60332100c..00000000000
--- a/src/mesa/drivers/haiku/swrast/SoftwareRast.rdef
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2012, Haiku, Inc. All rights reserved.
- * Distributed under the terms of the MIT License.
- */
-
-resource app_signature "application/x-vnd.Haiku-swrast";
-
-resource app_version {
- major = 9,
- middle = 0,
- minor = 0,
- variety = 0,
- internal = 0,
- short_info = "Software Rasterizer",
- long_info = "Haiku Mesa Software GL Rasterizer"
-};
-
-resource vector_icon {
- $"6E6369660A0200140294A9FF18020014028DFFFF97058C0500020006023B10B7"
- $"37F036BA1A993D466848C719BEBE2000919292FFD5D5D5020016023900000000"
- $"000000003EE0004AE00048E0005EF884C702000203392E8D383001BAD97F3C12"
- $"8B4786BD48B8AD0D97BBFFFF7B4168DBE9FF4168DB97020002023A0C1238D099"
- $"BE44203F4BD14B38844678240DF56A7D9FE1EA064CC704016B0500090A044024"
- $"2438404C5C380A044028243C40505C3C0A042438243B5C3C5C380608BFBE4D59"
- $"4D59515957575659585560406044603C5E3A5C3CCB4FBFBA5E3ECA9DC11F564B"
- $"584A544C504C0606AF0F2F3D2F3D393D4034BF593542324130432F42364432C0"
- $"3FBC5A2F48354A2F480608AE9A22303EB5BD3AB42542B755422E412F3C29322D"
- $"32223C0204263726372538263F253E263F304430443143303C313D303C02043D"
- $"423D423C433D4A3C493D4A495049504A4F49474A484947060DAEAAAE014E445A"
- $"3456365E325E3D5D3F5A3A5542544E4D573A4E364439463342324A2242310A0A"
- $"0002020102403CA00C88888C8CC1401673C40D6544F2950A01010002403CA000"
- $"0000000000401673C40D65446CF80A08020304023EC16A0000000000003EC16A"
- $"45DD1844C6550A030105123EC16A0000000000003EC16A45DD1844C655011784"
- $"22040A040105023EC16A0000000000003EC16A45DD1844C6550A030108123EC1"
- $"6A0000000000003EC16A45DD1844C65501178422040A0503080706023EC16A00"
- $"00000000003EC16A45DD1844C6550A030206071A3EC16A0000000000003EC16A"
- $"45DD1844C65510FF0215810004178222040A060106023EC16A0000000000003E"
- $"C16A45DD1844C6550A070107023EC16A0000000000003EC16A45DD1844C655"
-};
diff --git a/src/mesa/drivers/osmesa/Makefile.am b/src/mesa/drivers/osmesa/Makefile.am
index 9a388d64cd5..46332e16bd1 100644
--- a/src/mesa/drivers/osmesa/Makefile.am
+++ b/src/mesa/drivers/osmesa/Makefile.am
@@ -39,7 +39,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp
lib@OSMESA_LIB@_la_SOURCES = osmesa.c
lib@OSMESA_LIB@_la_LDFLAGS = \
- -module \
-no-undefined \
-version-number @OSMESA_VERSION@ \
$(GC_SECTIONS) \
diff --git a/src/mesa/drivers/x11/Makefile.am b/src/mesa/drivers/x11/Makefile.am
index c0596f8119e..ba79f6981b9 100644
--- a/src/mesa/drivers/x11/Makefile.am
+++ b/src/mesa/drivers/x11/Makefile.am
@@ -25,6 +25,11 @@
EXTRA_DIST = SConscript
+if HAVE_SHARED_GLAPI
+SHARED_GLAPI_CFLAGS = -DGLX_SHARED_GLAPI
+SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+endif
+
AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/mapi \
@@ -34,11 +39,10 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/mesa/main \
$(X11_INCLUDES) \
+ $(SHARED_GLAPI_CFLAGS) \
$(DEFINES)
-if HAVE_X11_DRIVER
lib_LTLIBRARIES = lib@[email protected]
-endif
lib@GL_LIB@_la_SOURCES = \
glxapi.h \
@@ -66,6 +70,7 @@ GL_PATCH = 0
lib@GL_LIB@_la_LIBADD = \
$(top_builddir)/src/mesa/libmesa.la \
$(top_builddir)/src/mapi/glapi/libglapi.la \
+ $(SHARED_GLAPI_LIB) \
$(GL_LIB_DEPS)
lib@GL_LIB@_la_LDFLAGS = \