summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Makefile.sources1
-rw-r--r--src/mesa/drivers/common/meta.c11
-rw-r--r--src/mesa/drivers/common/meta.h3
-rw-r--r--src/mesa/drivers/common/meta_blit.c84
-rw-r--r--src/mesa/drivers/common/meta_generate_mipmap.c2
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources2
-rw-r--r--src/mesa/drivers/dri/i965/brw_binding_tables.c21
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h73
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_compact.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c58
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_validate.c407
-rw-r--r--src/mesa/drivers/dri/i965/brw_ff_gs_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp784
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h33
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h17
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp19
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp59
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp25
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp115
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp1099
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp65
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp30
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_validate.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp82
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h27
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h27
-rw-r--r--src/mesa/drivers/dri/i965/brw_link.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_multisample_state.h26
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c299
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h96
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp176
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp53
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h37
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp367
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_builder.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp31
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp73
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp31
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp49
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp54
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp102
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_vue_map.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_multisample_state.c17
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sol.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen8_multisample_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/intel_asm_annotation.c95
-rw-r--r--src/mesa/drivers/dri/i965/intel_asm_annotation.h7
-rw-r--r--src/mesa/drivers/dri/i965/intel_extensions.c1
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c14
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.c5
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp4
-rw-r--r--src/mesa/main/arrayobj.c5
-rw-r--r--src/mesa/main/blend.c2
-rw-r--r--src/mesa/main/context.h1
-rw-r--r--src/mesa/main/copyimage.c37
-rw-r--r--src/mesa/main/extensions.c473
-rw-r--r--src/mesa/main/extensions.h44
-rw-r--r--src/mesa/main/extensions_table.h335
-rw-r--r--src/mesa/main/mtypes.h36
-rw-r--r--src/mesa/main/pipelineobj.c15
-rw-r--r--src/mesa/main/shader_query.cpp64
-rw-r--r--src/mesa/main/shaderobj.c4
-rw-r--r--src/mesa/main/shaderobj.h3
-rw-r--r--src/mesa/main/tests/dispatch_sanity.cpp3
-rw-r--r--src/mesa/main/texstate.c3
-rw-r--r--src/mesa/main/uniforms.c4
-rw-r--r--src/mesa/main/version.c1
-rw-r--r--src/mesa/state_tracker/st_cb_bufferobjects.c1
-rw-r--r--src/mesa/state_tracker/st_cb_copyimage.c8
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c29
-rw-r--r--src/mesa/state_tracker/st_extensions.c2
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp4
-rw-r--r--src/mesa/state_tracker/st_manager.c57
-rw-r--r--src/mesa/vbo/vbo_exec_api.c11
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c1
103 files changed, 4097 insertions, 1957 deletions
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index de0e330b7d1..778b92d9892 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -77,6 +77,7 @@ MAIN_FILES = \
main/execmem.c \
main/extensions.c \
main/extensions.h \
+ main/extensions_table.h \
main/fbobject.c \
main/fbobject.h \
main/feedback.c \
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index e27489d6195..0ffcd9c2c3f 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -449,6 +449,16 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
save->API = ctx->API;
ctx->API = API_OPENGL_COMPAT;
+ /* Mesa's extension helper functions use the current context's API to look up
+ * the version required by an extension as a step in determining whether or
+ * not it has been advertised. Since meta aims to only be restricted by the
+ * driver capability (and not by whether or not an extension has been
+ * advertised), set the helper functions' Version variable to a value that
+ * will make the checks on the context API and version unconditionally pass.
+ */
+ save->ExtensionsVersion = ctx->Extensions.Version;
+ ctx->Extensions.Version = ~0;
+
/* Pausing transform feedback needs to be done early, or else we won't be
* able to change other state.
*/
@@ -1222,6 +1232,7 @@ _mesa_meta_end(struct gl_context *ctx)
ctx->Meta->SaveStackDepth--;
ctx->API = save->API;
+ ctx->Extensions.Version = save->ExtensionsVersion;
}
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index 23fa209905d..d4bf0b65524 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -72,6 +72,7 @@ struct save_state
/* Always saved/restored with meta. */
gl_api API;
+ uint8_t ExtensionsVersion;
/** MESA_META_CLEAR (and others?) */
struct gl_query_object *CurrentOcclusionObject;
@@ -285,9 +286,11 @@ enum blit_msaa_shader {
BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
+ BLIT_16X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
+ BLIT_16X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
BLIT_MSAA_SHADER_COUNT,
};
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 5972a5af0c9..4a2444af0f9 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -72,20 +72,25 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
char *sample_map_expr = rzalloc_size(mem_ctx, 1);
char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
const char *sampler_array_suffix = "";
- float y_scale;
+ float x_scale, y_scale;
enum blit_msaa_shader shader_index;
assert(src_rb);
samples = MAX2(src_rb->NumSamples, 1);
- y_scale = samples * 0.5;
+
+ if (samples == 16)
+ x_scale = 4.0;
+ else
+ x_scale = 2.0;
+ y_scale = samples / x_scale;
/* We expect only power of 2 samples in source multisample buffer. */
assert(samples > 0 && _mesa_is_pow_two(samples));
while (samples >> (shader_offset + 1)) {
shader_offset++;
}
- /* Update the assert if we plan to support more than 8X MSAA. */
- assert(shader_offset > 0 && shader_offset < 4);
+ /* Update the assert if we plan to support more than 16X MSAA. */
+ assert(shader_offset > 0 && shader_offset <= 4);
assert(target == GL_TEXTURE_2D_MULTISAMPLE ||
target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY);
@@ -129,6 +134,10 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
sample_number = "sample_map[int(2 * fract(coord.x) + 8 * fract(coord.y))]";
sample_map = ctx->Const.SampleMap8x;
break;
+ case 16:
+ sample_number = "sample_map[int(4 * fract(coord.x) + 16 * fract(coord.y))]";
+ sample_map = ctx->Const.SampleMap16x;
+ break;
default:
sample_number = NULL;
sample_map = NULL;
@@ -184,9 +193,9 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
"{\n"
"%s"
" vec2 interp;\n"
- " const vec2 scale = vec2(2.0f, %ff);\n"
- " const vec2 scale_inv = vec2(0.5f, %ff);\n"
- " const vec2 s_0_offset = vec2(0.25f, %ff);\n"
+ " const vec2 scale = vec2(%ff, %ff);\n"
+ " const vec2 scale_inv = vec2(%ff, %ff);\n"
+ " const vec2 s_0_offset = vec2(%ff, %ff);\n"
" vec2 s_0_coord, s_1_coord, s_2_coord, s_3_coord;\n"
" vec4 s_0_color, s_1_color, s_2_color, s_3_color;\n"
" vec4 x_0_color, x_1_color;\n"
@@ -219,9 +228,9 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
"}\n",
sampler_array_suffix,
sample_map_expr,
- y_scale,
- 1.0f / y_scale,
- 1.0f / samples,
+ x_scale, y_scale,
+ 1.0f / x_scale, 1.0f / y_scale,
+ 0.5f / x_scale, 0.5f / y_scale,
texel_fetch_macro);
_mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name,
@@ -348,17 +357,17 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_COPY ||
shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_COPY) {
char *sample_index;
- const char *arb_sample_shading_extension_string;
+ const char *tex_coords = "texCoords";
if (dst_is_msaa) {
- arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable";
sample_index = "gl_SampleID";
name = "depth MSAA copy";
+
+ if (ctx->Extensions.ARB_gpu_shader5 && samples >= 16) {
+ /* See comment below for the color copy */
+ tex_coords = "interpolateAtOffset(texCoords, vec2(0.0))";
+ }
} else {
- /* Don't need that extension, since we're drawing to a single-sampled
- * destination.
- */
- arb_sample_shading_extension_string = "";
/* From the GL 4.3 spec:
*
* "If there is a multisample buffer (the value of SAMPLE_BUFFERS
@@ -388,34 +397,59 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
fs_source = ralloc_asprintf(mem_ctx,
"#version 130\n"
"#extension GL_ARB_texture_multisample : enable\n"
- "%s\n"
+ "#extension GL_ARB_sample_shading : enable\n"
+ "#extension GL_ARB_gpu_shader5 : enable\n"
"uniform sampler2DMS%s texSampler;\n"
"in %s texCoords;\n"
"out vec4 out_color;\n"
"\n"
"void main()\n"
"{\n"
- " gl_FragDepth = texelFetch(texSampler, i%s(texCoords), %s).r;\n"
+ " gl_FragDepth = texelFetch(texSampler, i%s(%s), %s).r;\n"
"}\n",
- arb_sample_shading_extension_string,
sampler_array_suffix,
texcoord_type,
texcoord_type,
+ tex_coords,
sample_index);
} else {
/* You can create 2D_MULTISAMPLE textures with 0 sample count (meaning 1
* sample). Yes, this is ridiculous.
*/
char *sample_resolve;
- const char *arb_sample_shading_extension_string;
const char *merge_function;
name = ralloc_asprintf(mem_ctx, "%svec4 MSAA %s",
vec4_prefix,
dst_is_msaa ? "copy" : "resolve");
if (dst_is_msaa) {
- arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable";
- sample_resolve = ralloc_asprintf(mem_ctx, " out_color = texelFetch(texSampler, i%s(texCoords), gl_SampleID);", texcoord_type);
+ const char *tex_coords;
+
+ if (ctx->Extensions.ARB_gpu_shader5 && samples >= 16) {
+ /* If interpolateAtOffset is available then it will be used to
+ * force the interpolation to the center. This is required at
+ * least on Intel hardware because it is possible to have a sample
+ * position on the 0 x or y axis which means it will lie exactly
+ * on the pixel boundary. If we let the hardware interpolate the
+ * coordinates at one of these positions then it is possible for
+ * it to jump to a neighboring texel when converting to ints due
+ * to rounding errors. This is only done for >= 16x MSAA because
+ * it probably has some overhead. It is more likely that some
+ * hardware will use one of these problematic positions at 16x
+ * MSAA because in that case in D3D they are defined to be at
+ * these positions.
+ */
+ tex_coords = "interpolateAtOffset(texCoords, vec2(0.0))";
+ } else {
+ tex_coords = "texCoords";
+ }
+
+ sample_resolve =
+ ralloc_asprintf(mem_ctx,
+ " out_color = texelFetch(texSampler, "
+ "i%s(%s), gl_SampleID);",
+ texcoord_type, tex_coords);
+
merge_function = "";
} else {
int i;
@@ -430,8 +464,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
"vec4 merge(vec4 a, vec4 b) { return (a + b); }\n";
}
- arb_sample_shading_extension_string = "";
-
/* We're assuming power of two samples for this resolution procedure.
*
* To avoid losing any floating point precision if the samples all
@@ -487,7 +519,8 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
fs_source = ralloc_asprintf(mem_ctx,
"#version 130\n"
"#extension GL_ARB_texture_multisample : enable\n"
- "%s\n"
+ "#extension GL_ARB_sample_shading : enable\n"
+ "#extension GL_ARB_gpu_shader5 : enable\n"
"#define gvec4 %svec4\n"
"uniform %ssampler2DMS%s texSampler;\n"
"in %s texCoords;\n"
@@ -498,7 +531,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
"{\n"
"%s\n" /* sample_resolve */
"}\n",
- arb_sample_shading_extension_string,
vec4_prefix,
vec4_prefix,
sampler_array_suffix,
diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c
index 4800278a467..a9da0a21ba3 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -128,6 +128,8 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gen_mipmap_state *mipmap)
mipmap->VAO = 0;
_mesa_DeleteBuffers(1, &mipmap->VBO);
mipmap->VBO = 0;
+ _mesa_DeleteSamplers(1, &mipmap->Sampler);
+ mipmap->Sampler = 0;
_mesa_meta_blit_shader_table_cleanup(&mipmap->shaders);
}
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index ed2654ef329..595903dd572 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -14,6 +14,7 @@ i965_compiler_FILES = \
brw_eu_emit.c \
brw_eu.h \
brw_eu_util.c \
+ brw_eu_validate.c \
brw_fs_builder.h \
brw_fs_channel_expressions.cpp \
brw_fs_cmod_propagation.cpp \
@@ -46,6 +47,7 @@ i965_compiler_FILES = \
brw_nir.h \
brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
+ brw_nir_opt_peephole_ffma.c \
brw_nir_uniforms.cpp \
brw_packed_float.c \
brw_predicated_break.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 508f1f019ae..d8226e0ca05 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -88,7 +88,6 @@ reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
void
brw_upload_binding_table(struct brw_context *brw,
uint32_t packet_name,
- GLbitfield brw_new_binding_table,
const struct brw_stage_prog_data *prog_data,
struct brw_stage_state *stage_state)
{
@@ -127,7 +126,7 @@ brw_upload_binding_table(struct brw_context *brw,
}
}
- brw->ctx.NewDriverState |= brw_new_binding_table;
+ brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
if (brw->gen >= 7) {
if (brw->use_resource_streamer) {
@@ -159,7 +158,7 @@ brw_vs_upload_binding_table(struct brw_context *brw)
const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_VS,
- BRW_NEW_VS_BINDING_TABLE, prog_data,
+ prog_data,
&brw->vs.base);
}
@@ -183,7 +182,7 @@ brw_upload_wm_binding_table(struct brw_context *brw)
const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_PS,
- BRW_NEW_PS_BINDING_TABLE, prog_data,
+ prog_data,
&brw->wm.base);
}
@@ -209,7 +208,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_GS,
- BRW_NEW_GS_BINDING_TABLE, prog_data,
+ prog_data,
&brw->gs.base);
}
@@ -406,10 +405,8 @@ const struct brw_tracked_state brw_binding_table_pointers = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
- BRW_NEW_GS_BINDING_TABLE |
- BRW_NEW_PS_BINDING_TABLE |
- BRW_NEW_STATE_BASE_ADDRESS |
- BRW_NEW_VS_BINDING_TABLE,
+ BRW_NEW_BINDING_TABLE_POINTERS |
+ BRW_NEW_STATE_BASE_ADDRESS,
},
.emit = gen4_upload_binding_table_pointers,
};
@@ -442,10 +439,8 @@ const struct brw_tracked_state gen6_binding_table_pointers = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
- BRW_NEW_GS_BINDING_TABLE |
- BRW_NEW_PS_BINDING_TABLE |
- BRW_NEW_STATE_BASE_ADDRESS |
- BRW_NEW_VS_BINDING_TABLE,
+ BRW_NEW_BINDING_TABLE_POINTERS |
+ BRW_NEW_STATE_BASE_ADDRESS,
},
.emit = gen6_upload_binding_table_pointers,
};
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index e5133ef5a3d..cd78af0dce4 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -146,6 +146,13 @@ struct brw_sampler_prog_key_data {
uint32_t compressed_multisample_layout_mask;
/**
+ * Whether this sampler is using 16x multisampling. If so fetching from
+ * this sampler will be handled with a different instruction, ld2dms_w
+ * instead of ld2dms.
+ */
+ uint32_t msaa_16;
+
+ /**
* For Sandybridge, which shader w/a we need for gather quirks.
*/
enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
@@ -454,6 +461,8 @@ struct brw_vue_map {
int num_slots;
};
+void brw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map);
+
/**
* Convert a VUE slot number into a byte offset within the VUE.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 3b125448e14..ac6045dbba9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -84,6 +84,12 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
switch (brw->gen) {
case 9:
+ samples[0] = 16;
+ samples[1] = 8;
+ samples[2] = 4;
+ samples[3] = 2;
+ return 4;
+
case 8:
samples[0] = 8;
samples[1] = 4;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c83f47bdff7..4b2db61c758 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -184,9 +184,7 @@ enum brw_state_id {
BRW_STATE_CONTEXT,
BRW_STATE_PSP,
BRW_STATE_SURFACES,
- BRW_STATE_VS_BINDING_TABLE,
- BRW_STATE_GS_BINDING_TABLE,
- BRW_STATE_PS_BINDING_TABLE,
+ BRW_STATE_BINDING_TABLE_POINTERS,
BRW_STATE_INDICES,
BRW_STATE_VERTICES,
BRW_STATE_BATCH,
@@ -261,9 +259,7 @@ enum brw_state_id {
#define BRW_NEW_CONTEXT (1ull << BRW_STATE_CONTEXT)
#define BRW_NEW_PSP (1ull << BRW_STATE_PSP)
#define BRW_NEW_SURFACES (1ull << BRW_STATE_SURFACES)
-#define BRW_NEW_VS_BINDING_TABLE (1ull << BRW_STATE_VS_BINDING_TABLE)
-#define BRW_NEW_GS_BINDING_TABLE (1ull << BRW_STATE_GS_BINDING_TABLE)
-#define BRW_NEW_PS_BINDING_TABLE (1ull << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_BINDING_TABLE_POINTERS (1ull << BRW_STATE_BINDING_TABLE_POINTERS)
#define BRW_NEW_INDICES (1ull << BRW_STATE_INDICES)
#define BRW_NEW_VERTICES (1ull << BRW_STATE_VERTICES)
/**
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 754da9fc3da..3ad90da8b2f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -79,7 +79,9 @@
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x16
-#endif
+#define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); })
+
+#endif /* bdw_pack.h */
/* We use this offset to be able to pass native primitive types in struct
* _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET +
@@ -840,43 +842,62 @@ enum PACKED brw_horizontal_stride {
enum opcode {
/* These are the actual hardware opcodes. */
+ BRW_OPCODE_ILLEGAL = 0,
BRW_OPCODE_MOV = 1,
BRW_OPCODE_SEL = 2,
+ BRW_OPCODE_MOVI = 3, /**< G45+ */
BRW_OPCODE_NOT = 4,
BRW_OPCODE_AND = 5,
BRW_OPCODE_OR = 6,
BRW_OPCODE_XOR = 7,
BRW_OPCODE_SHR = 8,
BRW_OPCODE_SHL = 9,
+ // BRW_OPCODE_DIM = 10, /**< Gen7.5 only */ /* Reused */
+ // BRW_OPCODE_SMOV = 10, /**< Gen8+ */ /* Reused */
+ /* Reserved - 11 */
BRW_OPCODE_ASR = 12,
+ /* Reserved - 13-15 */
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_CSEL = 18, /**< Gen8+ */
BRW_OPCODE_F32TO16 = 19, /**< Gen7 only */
BRW_OPCODE_F16TO32 = 20, /**< Gen7 only */
+ /* Reserved - 21-22 */
BRW_OPCODE_BFREV = 23, /**< Gen7+ */
BRW_OPCODE_BFE = 24, /**< Gen7+ */
BRW_OPCODE_BFI1 = 25, /**< Gen7+ */
BRW_OPCODE_BFI2 = 26, /**< Gen7+ */
+ /* Reserved - 27-31 */
BRW_OPCODE_JMPI = 32,
+ // BRW_OPCODE_BRD = 33, /**< Gen7+ */
BRW_OPCODE_IF = 34,
- BRW_OPCODE_IFF = 35, /**< Pre-Gen6 */
+ BRW_OPCODE_IFF = 35, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_BRC = 35, /**< Gen7+ */ /* Reused */
BRW_OPCODE_ELSE = 36,
BRW_OPCODE_ENDIF = 37,
- BRW_OPCODE_DO = 38,
+ BRW_OPCODE_DO = 38, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_CASE = 38, /**< Gen6 only */ /* Reused */
BRW_OPCODE_WHILE = 39,
BRW_OPCODE_BREAK = 40,
BRW_OPCODE_CONTINUE = 41,
BRW_OPCODE_HALT = 42,
- BRW_OPCODE_MSAVE = 44, /**< Pre-Gen6 */
- BRW_OPCODE_MRESTORE = 45, /**< Pre-Gen6 */
- BRW_OPCODE_PUSH = 46, /**< Pre-Gen6 */
- BRW_OPCODE_GOTO = 46, /**< Gen8+ */
- BRW_OPCODE_POP = 47, /**< Pre-Gen6 */
+ // BRW_OPCODE_CALLA = 43, /**< Gen7.5+ */
+ // BRW_OPCODE_MSAVE = 44, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_CALL = 44, /**< Gen6+ */ /* Reused */
+ // BRW_OPCODE_MREST = 45, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_RET = 45, /**< Gen6+ */ /* Reused */
+ // BRW_OPCODE_PUSH = 46, /**< Pre-Gen6 */ /* Reused */
+ // BRW_OPCODE_FORK = 46, /**< Gen6 only */ /* Reused */
+ // BRW_OPCODE_GOTO = 46, /**< Gen8+ */ /* Reused */
+ // BRW_OPCODE_POP = 47, /**< Pre-Gen6 */
BRW_OPCODE_WAIT = 48,
BRW_OPCODE_SEND = 49,
BRW_OPCODE_SENDC = 50,
+ BRW_OPCODE_SENDS = 51, /**< Gen9+ */
+ BRW_OPCODE_SENDSC = 52, /**< Gen9+ */
+ /* Reserved 53-55 */
BRW_OPCODE_MATH = 56, /**< Gen6+ */
+ /* Reserved 57-63 */
BRW_OPCODE_ADD = 64,
BRW_OPCODE_MUL = 65,
BRW_OPCODE_AVG = 66,
@@ -895,16 +916,21 @@ enum opcode {
BRW_OPCODE_SUBB = 79, /**< Gen7+ */
BRW_OPCODE_SAD2 = 80,
BRW_OPCODE_SADA2 = 81,
+ /* Reserved 82-83 */
BRW_OPCODE_DP4 = 84,
BRW_OPCODE_DPH = 85,
BRW_OPCODE_DP3 = 86,
BRW_OPCODE_DP2 = 87,
+ /* Reserved 88 */
BRW_OPCODE_LINE = 89,
BRW_OPCODE_PLN = 90, /**< G45+ */
BRW_OPCODE_MAD = 91, /**< Gen6+ */
BRW_OPCODE_LRP = 92, /**< Gen6+ */
+ // BRW_OPCODE_MADM = 93, /**< Gen8+ */
+ /* Reserved 94-124 */
BRW_OPCODE_NENOP = 125, /**< G45 only */
BRW_OPCODE_NOP = 126,
+ /* Reserved 127 */
/* These are compiler backend opcodes that get translated into other
* instructions.
@@ -966,6 +992,8 @@ enum opcode {
FS_OPCODE_TXB_LOGICAL,
SHADER_OPCODE_TXF_CMS,
SHADER_OPCODE_TXF_CMS_LOGICAL,
+ SHADER_OPCODE_TXF_CMS_W,
+ SHADER_OPCODE_TXF_CMS_W_LOGICAL,
SHADER_OPCODE_TXF_UMS,
SHADER_OPCODE_TXF_UMS_LOGICAL,
SHADER_OPCODE_TXF_MCS,
@@ -1029,13 +1057,10 @@ enum opcode {
SHADER_OPCODE_GEN7_SCRATCH_READ,
/**
- * Gen8+ SIMD8 URB Read message.
- *
- * Source 0: The header register, containing URB handles (g1).
- *
- * Currently only supports constant offsets, in inst->offset.
+ * Gen8+ SIMD8 URB Read messages.
*/
SHADER_OPCODE_URB_READ_SIMD8,
+ SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
SHADER_OPCODE_URB_WRITE_SIMD8,
SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
@@ -1373,10 +1398,23 @@ enum PACKED brw_predicate {
BRW_PREDICATE_ALIGN16_ALL4H = 7,
};
-#define BRW_ARCHITECTURE_REGISTER_FILE 0
-#define BRW_GENERAL_REGISTER_FILE 1
-#define BRW_MESSAGE_REGISTER_FILE 2
-#define BRW_IMMEDIATE_VALUE 3
+enum PACKED brw_reg_file {
+ BRW_ARCHITECTURE_REGISTER_FILE = 0,
+ BRW_GENERAL_REGISTER_FILE = 1,
+ BRW_MESSAGE_REGISTER_FILE = 2,
+ BRW_IMMEDIATE_VALUE = 3,
+
+ ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+ FIXED_GRF = BRW_GENERAL_REGISTER_FILE,
+ MRF = BRW_MESSAGE_REGISTER_FILE,
+ IMM = BRW_IMMEDIATE_VALUE,
+
+ /* These are not hardware values */
+ VGRF,
+ ATTR,
+ UNIFORM, /* prog_data->params[reg] */
+ BAD_FILE,
+};
#define BRW_HW_REG_TYPE_UD 0
#define BRW_HW_REG_TYPE_D 1
@@ -1541,6 +1579,7 @@ enum brw_message_target {
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
+#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 6372fb5c55f..541c7958d5e 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -337,6 +337,15 @@ static const struct brw_device_info brw_device_info_skl_gt3 = {
static const struct brw_device_info brw_device_info_skl_gt4 = {
GEN9_FEATURES, .gt = 4,
+ /* From the "L3 Allocation and Programming" documentation:
+ *
+ * "URB is limited to 1008KB due to programming restrictions. This is not a
+ * restriction of the L3 implementation, but of the FF and other clients.
+ * Therefore, in a GT4 implementation it is possible for the programmed
+ * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
+ * only 1008KB of this will be used."
+ */
+ .urb.size = 1008 / 3,
};
static const struct brw_device_info brw_device_info_bxt = {
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index df747107188..650bdeea344 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -34,6 +34,7 @@
const struct opcode_desc opcode_descs[128] = {
[BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MOVI] = { .name = "movi", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
@@ -83,23 +84,26 @@ const struct opcode_desc opcode_descs[128] = {
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SENDSC] = { .name = "sendsc", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ILLEGAL] = { .name = "illegal", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_NENOP] = { .name = "nenop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
- [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
- [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
- [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 0, .ndst = 0 },
+ // [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ // [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ // [BRW_OPCODE_MREST] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ // [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
- [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 0, .ndst = 0 },
};
static bool
@@ -137,8 +141,8 @@ has_branch_ctrl(const struct brw_device_info *devinfo, enum opcode opcode)
return false;
return opcode == BRW_OPCODE_IF ||
- opcode == BRW_OPCODE_ELSE ||
- opcode == BRW_OPCODE_GOTO;
+ opcode == BRW_OPCODE_ELSE;
+ /* opcode == BRW_OPCODE_GOTO; */
}
static bool
@@ -622,6 +626,7 @@ static const char *const gen5_sampler_msg_type[] = {
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po",
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
[HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c",
+ [GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss",
@@ -720,7 +725,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
/* Clear the Compr4 instruction compression bit. */
if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
- _reg_nr &= ~(1 << 7);
+ _reg_nr &= ~BRW_MRF_COMPR4;
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
@@ -1644,7 +1649,7 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_COMPRESSED &&
opcode_descs[opcode].ndst > 0 &&
brw_inst_dst_reg_file(devinfo, inst) == BRW_MESSAGE_REGISTER_FILE &&
- brw_inst_dst_da_reg_nr(devinfo, inst) & (1 << 7)) {
+ brw_inst_dst_da_reg_nr(devinfo, inst) & BRW_MRF_COMPR4) {
format(file, " compr4");
} else {
err |= control(file, "compression control", compr_ctrl,
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 61683c81b79..a2eaf8fb1e0 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -111,9 +111,16 @@ brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
static void
gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
{
+ const struct gl_context *ctx = &brw->ctx;
+ uint32_t hw_prim;
+
DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
- const uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
+ if (prim->mode == GL_PATCHES)
+ hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices);
+ else
+ hw_prim = get_hw_prim_for_gl_prim(prim->mode);
+
if (hw_prim != brw->primitive) {
brw->primitive = hw_prim;
brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 0ac1ad9378b..829e39330f2 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -522,6 +522,10 @@ bool brw_try_compact_instruction(const struct brw_device_info *devinfo,
void brw_debug_compact_uncompact(const struct brw_device_info *devinfo,
brw_inst *orig, brw_inst *uncompacted);
+/* brw_eu_validate.c */
+bool brw_validate_instructions(const struct brw_codegen *p, int start_offset,
+ struct annotation_info *annotation);
+
static inline int
next_offset(const struct brw_device_info *devinfo, void *store, int offset)
{
@@ -533,6 +537,12 @@ next_offset(const struct brw_device_info *devinfo, void *store, int offset)
return offset + 16;
}
+static inline bool
+is_3src(enum opcode opcode)
+{
+ return opcode_descs[opcode].nsrc == 3;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 07ace6bfbcb..bca8a84154f 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -954,13 +954,6 @@ is_compactable_immediate(unsigned imm)
return imm == 0 || imm == 0xfffff000;
}
-/* Returns whether an opcode takes three sources. */
-static bool
-is_3src(uint32_t op)
-{
- return opcode_descs[op].nsrc == 3;
-}
-
/**
* Tries to compact instruction src into dst.
*
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index a6fbb542919..da1ddfddb50 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -92,7 +92,7 @@ gen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
*/
unsigned
brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
- enum brw_reg_type type, unsigned file)
+ enum brw_reg_type type, enum brw_reg_file file)
{
if (file == BRW_IMMEDIATE_VALUE) {
static const int imm_hw_types[] = {
@@ -147,7 +147,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
const struct brw_device_info *devinfo = p->devinfo;
if (dest.file == BRW_MESSAGE_REGISTER_FILE)
- assert((dest.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+ assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(dest.nr < 128);
@@ -169,10 +169,10 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
} else {
brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
- brw_inst_set_da16_writemask(devinfo, inst, dest.dw1.bits.writemask);
+ brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
if (dest.file == BRW_GENERAL_REGISTER_FILE ||
dest.file == BRW_MESSAGE_REGISTER_FILE) {
- assert(dest.dw1.bits.writemask != 0);
+ assert(dest.writemask != 0);
}
/* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
* Although Dst.HorzStride is a don't care for Align16, HW needs
@@ -187,13 +187,13 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
*/
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
- dest.dw1.bits.indirect_offset);
+ dest.indirect_offset);
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
} else {
brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
- dest.dw1.bits.indirect_offset);
+ dest.indirect_offset);
/* even ignored in da16, still need to set as '01' */
brw_inst_set_dst_hstride(devinfo, inst, 1);
}
@@ -243,7 +243,7 @@ validate_reg(const struct brw_device_info *devinfo,
*/
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
reg.nr == BRW_ARF_ACCUMULATOR)
- assert(reg.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+ assert(reg.swizzle == BRW_SWIZZLE_XYZW);
assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
hstride = hstride_for_reg[reg.hstride];
@@ -311,7 +311,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
const struct brw_device_info *devinfo = p->devinfo;
if (reg.file == BRW_MESSAGE_REGISTER_FILE)
- assert((reg.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+ assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
@@ -338,7 +338,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
if (reg.file == BRW_IMMEDIATE_VALUE) {
- brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud);
+ brw_inst_set_imm_ud(devinfo, inst, reg.ud);
/* The Bspec's section titled "Non-present Operands" claims that if src0
* is an immediate that src1's type must be the same as that of src0.
@@ -408,9 +408,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
+ brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
} else {
- brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
+ brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
}
}
@@ -427,13 +427,13 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
}
} else {
brw_inst_set_src0_da16_swiz_x(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
brw_inst_set_src0_da16_swiz_y(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
brw_inst_set_src0_da16_swiz_z(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
brw_inst_set_src0_da16_swiz_w(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
@@ -479,7 +479,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
if (reg.file == BRW_IMMEDIATE_VALUE) {
- brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud);
+ brw_inst_set_imm_ud(devinfo, inst, reg.ud);
} else {
/* This is a hardware restriction, which may or may not be lifted
* in the future:
@@ -507,13 +507,13 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
}
} else {
brw_inst_set_src1_da16_swiz_x(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
brw_inst_set_src1_da16_swiz_y(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
brw_inst_set_src1_da16_swiz_z(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
brw_inst_set_src1_da16_swiz_w(devinfo, inst,
- BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
@@ -848,8 +848,8 @@ static int
get_3src_subreg_nr(struct brw_reg reg)
{
if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
- assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
- return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
+ assert(brw_is_single_value_swizzle(reg.swizzle));
+ return reg.subnr / 4 + BRW_GET_SWZ(reg.swizzle, 0);
} else {
return reg.subnr / 4;
}
@@ -879,12 +879,12 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
}
brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
brw_inst_set_3src_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
- brw_inst_set_3src_dst_writemask(devinfo, inst, dest.dw1.bits.writemask);
+ brw_inst_set_3src_dst_writemask(devinfo, inst, dest.writemask);
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
assert(src0.nr < 128);
- brw_inst_set_3src_src0_swizzle(devinfo, inst, src0.dw1.bits.swizzle);
+ brw_inst_set_3src_src0_swizzle(devinfo, inst, src0.swizzle);
brw_inst_set_3src_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
@@ -895,7 +895,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
assert(src1.nr < 128);
- brw_inst_set_3src_src1_swizzle(devinfo, inst, src1.dw1.bits.swizzle);
+ brw_inst_set_3src_src1_swizzle(devinfo, inst, src1.swizzle);
brw_inst_set_3src_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
@@ -906,7 +906,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
assert(src2.file == BRW_GENERAL_REGISTER_FILE);
assert(src2.address_mode == BRW_ADDRESS_DIRECT);
assert(src2.nr < 128);
- brw_inst_set_3src_src2_swizzle(devinfo, inst, src2.dw1.bits.swizzle);
+ brw_inst_set_3src_src2_swizzle(devinfo, inst, src2.swizzle);
brw_inst_set_3src_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
@@ -2426,7 +2426,7 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
const int sampler_state_size = 16; /* 16 bytes */
- uint32_t sampler = sampler_index.dw1.ud;
+ uint32_t sampler = sampler_index.ud;
if (sampler >= 16) {
assert(devinfo->is_haswell || devinfo->gen >= 8);
@@ -2581,7 +2581,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
*/
insn = brw_AND(p, addr,
suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
- BRW_GET_SWZ(surface.dw1.bits.swizzle, 0)),
+ BRW_GET_SWZ(surface.swizzle, 0)),
brw_imm_ud(0xff));
brw_pop_insn_state(p);
@@ -3336,7 +3336,7 @@ brw_broadcast(struct brw_codegen *p,
* We will typically not get here if the optimizer is doing its job, but
* asserting would be mean.
*/
- const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0;
+ const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
brw_MOV(p, dst,
(align1 ? stride(suboffset(src, i), 0, 1, 0) :
stride(suboffset(src, 4 * i), 0, 4, 1)));
diff --git a/src/mesa/drivers/dri/i965/brw_eu_validate.c b/src/mesa/drivers/dri/i965/brw_eu_validate.c
new file mode 100644
index 00000000000..2de2ea1babc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu_validate.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_eu_validate.c
+ *
+ * This file implements a pass that validates shader assembly.
+ */
+
+#include "brw_eu.h"
+
+/* We're going to do lots of string concatenation, so this should help. */
+struct string {
+ char *str;
+ size_t len;
+};
+
+static void
+cat(struct string *dest, const struct string src)
+{
+ dest->str = realloc(dest->str, dest->len + src.len + 1);
+ memcpy(dest->str + dest->len, src.str, src.len);
+ dest->str[dest->len + src.len] = '\0';
+ dest->len = dest->len + src.len;
+}
+#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
+
+#define error(str) "\tERROR: " str "\n"
+
+#define ERROR_IF(cond, msg) \
+ do { \
+ if (cond) { \
+ CAT(error_msg, error(msg)); \
+ valid = false; \
+ } \
+ } while(0)
+
+static bool
+src0_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
+{
+ return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
+ brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
+}
+
+static bool
+src1_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
+{
+ return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
+ brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
+}
+
+enum gen {
+ GEN4 = (1 << 0),
+ GEN45 = (1 << 1),
+ GEN5 = (1 << 2),
+ GEN6 = (1 << 3),
+ GEN7 = (1 << 4),
+ GEN75 = (1 << 5),
+ GEN8 = (1 << 6),
+ GEN9 = (1 << 7),
+ GEN_ALL = ~0
+};
+
+#define GEN_GE(gen) (~((gen) - 1) | gen)
+#define GEN_LE(gen) (((gen) - 1) | gen)
+
+struct inst_info {
+ enum gen gen;
+};
+
+static const struct inst_info inst_info[128] = {
+ [BRW_OPCODE_ILLEGAL] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MOV] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SEL] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MOVI] = {
+ .gen = GEN_GE(GEN45),
+ },
+ [BRW_OPCODE_NOT] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_AND] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_OR] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_XOR] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SHR] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SHL] = {
+ .gen = GEN_ALL,
+ },
+ /* BRW_OPCODE_DIM / BRW_OPCODE_SMOV */
+ /* Reserved - 11 */
+ [BRW_OPCODE_ASR] = {
+ .gen = GEN_ALL,
+ },
+ /* Reserved - 13-15 */
+ [BRW_OPCODE_CMP] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_CMPN] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_CSEL] = {
+ .gen = GEN_GE(GEN8),
+ },
+ [BRW_OPCODE_F32TO16] = {
+ .gen = GEN7 | GEN75,
+ },
+ [BRW_OPCODE_F16TO32] = {
+ .gen = GEN7 | GEN75,
+ },
+ /* Reserved - 21-22 */
+ [BRW_OPCODE_BFREV] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_BFE] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_BFI1] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_BFI2] = {
+ .gen = GEN_GE(GEN7),
+ },
+ /* Reserved - 27-31 */
+ [BRW_OPCODE_JMPI] = {
+ .gen = GEN_ALL,
+ },
+ /* BRW_OPCODE_BRD */
+ [BRW_OPCODE_IF] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_IFF] = { /* also BRW_OPCODE_BRC */
+ .gen = GEN_LE(GEN5),
+ },
+ [BRW_OPCODE_ELSE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_ENDIF] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DO] = { /* also BRW_OPCODE_CASE */
+ .gen = GEN_LE(GEN5),
+ },
+ [BRW_OPCODE_WHILE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_BREAK] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_CONTINUE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_HALT] = {
+ .gen = GEN_ALL,
+ },
+ /* BRW_OPCODE_CALLA */
+ /* BRW_OPCODE_MSAVE / BRW_OPCODE_CALL */
+ /* BRW_OPCODE_MREST / BRW_OPCODE_RET */
+ /* BRW_OPCODE_PUSH / BRW_OPCODE_FORK / BRW_OPCODE_GOTO */
+ /* BRW_OPCODE_POP */
+ [BRW_OPCODE_WAIT] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SEND] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SENDC] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SENDS] = {
+ .gen = GEN_GE(GEN9),
+ },
+ [BRW_OPCODE_SENDSC] = {
+ .gen = GEN_GE(GEN9),
+ },
+ /* Reserved 53-55 */
+ [BRW_OPCODE_MATH] = {
+ .gen = GEN_GE(GEN6),
+ },
+ /* Reserved 57-63 */
+ [BRW_OPCODE_ADD] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MUL] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_AVG] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_FRC] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDU] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDD] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDZ] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MAC] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MACH] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_LZD] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_FBH] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_FBL] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_CBIT] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_ADDC] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_SUBB] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_SAD2] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SADA2] = {
+ .gen = GEN_ALL,
+ },
+ /* Reserved 82-83 */
+ [BRW_OPCODE_DP4] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DPH] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DP3] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DP2] = {
+ .gen = GEN_ALL,
+ },
+ /* Reserved 88 */
+ [BRW_OPCODE_LINE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_PLN] = {
+ .gen = GEN_GE(GEN45),
+ },
+ [BRW_OPCODE_MAD] = {
+ .gen = GEN_GE(GEN6),
+ },
+ [BRW_OPCODE_LRP] = {
+ .gen = GEN_GE(GEN6),
+ },
+ /* Reserved 93-124 */
+ /* BRW_OPCODE_NENOP */
+ [BRW_OPCODE_NOP] = {
+ .gen = GEN_ALL,
+ },
+};
+
+static unsigned
+num_sources_from_inst(const struct brw_device_info *devinfo,
+ const brw_inst *inst)
+{
+ unsigned math_function;
+
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
+ math_function = brw_inst_math_function(devinfo, inst);
+ } else if (devinfo->gen < 6 &&
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
+ if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
+ math_function = brw_inst_math_msg_function(devinfo, inst);
+ } else {
+ /* Send instructions are allowed to have null sources since they use
+ * the base_mrf field to specify which message register source.
+ */
+ return 0;
+ }
+ } else {
+ return opcode_descs[brw_inst_opcode(devinfo, inst)].nsrc;
+ }
+
+ switch (math_function) {
+ case BRW_MATH_FUNCTION_INV:
+ case BRW_MATH_FUNCTION_LOG:
+ case BRW_MATH_FUNCTION_EXP:
+ case BRW_MATH_FUNCTION_SQRT:
+ case BRW_MATH_FUNCTION_RSQ:
+ case BRW_MATH_FUNCTION_SIN:
+ case BRW_MATH_FUNCTION_COS:
+ case BRW_MATH_FUNCTION_SINCOS:
+ case GEN8_MATH_FUNCTION_INVM:
+ case GEN8_MATH_FUNCTION_RSQRTM:
+ return 1;
+ case BRW_MATH_FUNCTION_FDIV:
+ case BRW_MATH_FUNCTION_POW:
+ case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+ case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
+ case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
+ return 2;
+ default:
+ unreachable("not reached");
+ }
+}
+
+static enum gen
+gen_from_devinfo(const struct brw_device_info *devinfo)
+{
+ switch (devinfo->gen) {
+ case 4: return devinfo->is_g4x ? GEN45 : GEN4;
+ case 5: return GEN5;
+ case 6: return GEN6;
+ case 7: return devinfo->is_haswell ? GEN75 : GEN7;
+ case 8: return GEN8;
+ case 9: return GEN9;
+ default:
+ unreachable("not reached");
+ }
+}
+
+static bool
+is_unsupported_inst(const struct brw_device_info *devinfo,
+ const brw_inst *inst)
+{
+ enum gen gen = gen_from_devinfo(devinfo);
+ return (inst_info[brw_inst_opcode(devinfo, inst)].gen & gen) == 0;
+}
+
+bool
+brw_validate_instructions(const struct brw_codegen *p, int start_offset,
+ struct annotation_info *annotation)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+ const void *store = p->store + start_offset / 16;
+ bool valid = true;
+
+ for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
+ src_offset += sizeof(brw_inst)) {
+ struct string error_msg = { .str = NULL, .len = 0 };
+ const brw_inst *inst = store + src_offset;
+
+ switch (num_sources_from_inst(devinfo, inst)) {
+ case 3:
+ /* Nothing to test. 3-src instructions can only have GRF sources, and
+ * there's no bit to control the file.
+ */
+ break;
+ case 2:
+ ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
+ /* fallthrough */
+ case 1:
+ ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
+ break;
+ case 0:
+ default:
+ break;
+ }
+
+ ERROR_IF(is_unsupported_inst(devinfo, inst),
+ "Instruction not supported on this Gen");
+
+ if (error_msg.str && annotation) {
+ annotation_insert_error(annotation, src_offset, error_msg.str);
+ }
+ free(error_msg.str);
+ }
+
+ return valid;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c
index 50bda619f55..830fc6e41df 100644
--- a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c
@@ -436,7 +436,7 @@ gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key,
vertex_slot.nr += slot / 2;
vertex_slot.subnr = (slot % 2) * 16;
/* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
- vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
+ vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_MOV(p, stride(c->reg.header, 4, 4, 1),
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e218a85a363..3bec7285ef5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -43,6 +43,7 @@
#include "brw_wm.h"
#include "brw_fs.h"
#include "brw_cs.h"
+#include "brw_vec4_gs_visitor.h"
#include "brw_cfg.h"
#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
@@ -75,8 +76,9 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
/* This will be the case for almost all instructions. */
switch (dst.file) {
- case GRF:
- case HW_REG:
+ case VGRF:
+ case ARF:
+ case FIXED_GRF:
case MRF:
case ATTR:
this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
@@ -203,7 +205,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
- fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type);
+ fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), dst.type);
fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
@@ -232,7 +234,7 @@ fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf)
const fs_builder ubld = bld.annotate("send dependency resolve")
.half(0);
- ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
+ ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F));
}
bool
@@ -283,14 +285,15 @@ fs_inst::is_send_from_grf() const
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
+ case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
- return src[1].file == GRF;
+ return src[1].file == VGRF;
case FS_OPCODE_FB_WRITE:
- return src[0].file == GRF;
+ return src[0].file == VGRF;
default:
if (is_tex())
- return src[0].file == GRF;
+ return src[0].file == VGRF;
return false;
}
@@ -303,10 +306,10 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
return false;
fs_reg reg = this->src[0];
- if (reg.file != GRF || reg.reg_offset != 0 || reg.stride == 0)
+ if (reg.file != VGRF || reg.reg_offset != 0 || reg.stride == 0)
return false;
- if (grf_alloc.sizes[reg.reg] != this->regs_written)
+ if (grf_alloc.sizes[reg.nr] != this->regs_written)
return false;
for (int i = 0; i < this->sources; i++) {
@@ -378,7 +381,7 @@ fs_reg::fs_reg(float f)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_F;
this->stride = 0;
- this->fixed_hw_reg.dw1.f = f;
+ this->f = f;
}
/** Immediate value constructor. */
@@ -388,7 +391,7 @@ fs_reg::fs_reg(int32_t i)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_D;
this->stride = 0;
- this->fixed_hw_reg.dw1.d = i;
+ this->d = i;
}
/** Immediate value constructor. */
@@ -398,7 +401,7 @@ fs_reg::fs_reg(uint32_t u)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_UD;
this->stride = 0;
- this->fixed_hw_reg.dw1.ud = u;
+ this->ud = u;
}
/** Vector float immediate value constructor. */
@@ -407,7 +410,7 @@ fs_reg::fs_reg(uint8_t vf[4])
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_VF;
- memcpy(&this->fixed_hw_reg.dw1.ud, vf, sizeof(unsigned));
+ memcpy(&this->ud, vf, sizeof(unsigned));
}
/** Vector float immediate value constructor. */
@@ -416,42 +419,38 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_VF;
- this->fixed_hw_reg.dw1.ud = (vf0 << 0) |
- (vf1 << 8) |
- (vf2 << 16) |
- (vf3 << 24);
+ this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24);
}
-/** Fixed brw_reg. */
-fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
+fs_reg::fs_reg(struct brw_reg reg) :
+ backend_reg(reg)
{
- init();
- this->file = HW_REG;
- this->fixed_hw_reg = fixed_hw_reg;
- this->type = fixed_hw_reg.type;
+ this->reg_offset = 0;
+ this->subreg_offset = 0;
+ this->reladdr = NULL;
+ this->stride = 1;
+ if (this->file == IMM &&
+ (this->type != BRW_REGISTER_TYPE_V &&
+ this->type != BRW_REGISTER_TYPE_UV &&
+ this->type != BRW_REGISTER_TYPE_VF)) {
+ this->stride = 0;
+ }
}
bool
fs_reg::equals(const fs_reg &r) const
{
- return (file == r.file &&
- reg == r.reg &&
+ return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
reg_offset == r.reg_offset &&
subreg_offset == r.subreg_offset &&
- type == r.type &&
- negate == r.negate &&
- abs == r.abs &&
!reladdr && !r.reladdr &&
- ((file != HW_REG && file != IMM) ||
- memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
- sizeof(fixed_hw_reg)) == 0) &&
stride == r.stride);
}
fs_reg &
fs_reg::set_smear(unsigned subreg)
{
- assert(file != HW_REG && file != IMM);
+ assert(file != ARF && file != FIXED_GRF && file != IMM);
subreg_offset = subreg * type_sz(type);
stride = 0;
return *this;
@@ -466,9 +465,9 @@ fs_reg::is_contiguous() const
unsigned
fs_reg::component_size(unsigned width) const
{
- const unsigned stride = (file != HW_REG ? this->stride :
- fixed_hw_reg.hstride == 0 ? 0 :
- 1 << (fixed_hw_reg.hstride - 1));
+ const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride :
+ hstride == 0 ? 0 :
+ 1 << (hstride - 1));
return MAX2(width * stride, 1) * type_sz(type);
}
@@ -514,6 +513,19 @@ type_size_scalar(const struct glsl_type *type)
}
/**
+ * Returns the number of scalar components needed to store type, assuming
+ * that vectors are padded out to vec4.
+ *
+ * This has the packing rules of type_size_vec4(), but counts components
+ * similar to type_size_scalar().
+ */
+extern "C" int
+type_size_vec4_times_4(const struct glsl_type *type)
+{
+ return 4 * type_size_vec4(type);
+}
+
+/**
* Create a MOV to read the timestamp register.
*
* The caller is responsible for emitting the MOV. The return value is
@@ -529,7 +541,7 @@ fs_visitor::get_timestamp(const fs_builder &bld)
0),
BRW_REGISTER_TYPE_UD));
- fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg dst = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* We want to read the 3 fields we care about even if it's not enabled in
* the dispatch.
@@ -584,7 +596,7 @@ fs_visitor::emit_shader_time_end()
fs_reg start = shader_start_time;
start.negate = true;
- fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg diff = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
diff.set_smear(0);
const fs_builder cbld = ibld.group(1, 0);
@@ -706,7 +718,7 @@ fs_inst::components_read(unsigned i) const
assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
/* First/second FB write color. */
if (i < 2)
- return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
+ return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
else
return 1;
@@ -717,6 +729,7 @@ fs_inst::components_read(unsigned i) const
case SHADER_OPCODE_TXS_LOGICAL:
case FS_OPCODE_TXB_LOGICAL:
case SHADER_OPCODE_TXF_CMS_LOGICAL:
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
case SHADER_OPCODE_TXF_UMS_LOGICAL:
case SHADER_OPCODE_TXF_MCS_LOGICAL:
case SHADER_OPCODE_LOD_LOGICAL:
@@ -725,13 +738,16 @@ fs_inst::components_read(unsigned i) const
assert(src[8].file == IMM && src[9].file == IMM);
/* Texture coordinates. */
if (i == 0)
- return src[8].fixed_hw_reg.dw1.ud;
+ return src[8].ud;
/* Texture derivatives. */
else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL)
- return src[9].fixed_hw_reg.dw1.ud;
+ return src[9].ud;
/* Texture offset. */
else if (i == 7)
return 2;
+ /* MCS */
+ else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
+ return 2;
else
return 1;
@@ -740,7 +756,7 @@ fs_inst::components_read(unsigned i) const
assert(src[3].file == IMM);
/* Surface coordinates. */
if (i == 0)
- return src[3].fixed_hw_reg.dw1.ud;
+ return src[3].ud;
/* Surface operation source (ignored for reads). */
else if (i == 1)
return 0;
@@ -753,10 +769,10 @@ fs_inst::components_read(unsigned i) const
src[4].file == IMM);
/* Surface coordinates. */
if (i == 0)
- return src[3].fixed_hw_reg.dw1.ud;
+ return src[3].ud;
/* Surface operation source. */
else if (i == 1)
- return src[4].fixed_hw_reg.dw1.ud;
+ return src[4].ud;
else
return 1;
@@ -764,10 +780,10 @@ fs_inst::components_read(unsigned i) const
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {
assert(src[3].file == IMM &&
src[4].file == IMM);
- const unsigned op = src[4].fixed_hw_reg.dw1.ud;
+ const unsigned op = src[4].ud;
/* Surface coordinates. */
if (i == 0)
- return src[3].fixed_hw_reg.dw1.ud;
+ return src[3].ud;
/* Surface operation source. */
else if (i == 1 && op == BRW_AOP_CMPWR)
return 2;
@@ -793,6 +809,7 @@ fs_inst::regs_read(int arg) const
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
+ case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -825,7 +842,7 @@ fs_inst::regs_read(int arg) const
return 1;
default:
- if (is_tex() && arg == 0 && src[0].file == GRF)
+ if (is_tex() && arg == 0 && src[0].file == VGRF)
return mlen;
break;
}
@@ -836,9 +853,10 @@ fs_inst::regs_read(int arg) const
case UNIFORM:
case IMM:
return 1;
- case GRF:
+ case ARF:
+ case FIXED_GRF:
+ case VGRF:
case ATTR:
- case HW_REG:
return DIV_ROUND_UP(components_read(arg) *
src[arg].component_size(exec_size),
REG_SIZE);
@@ -896,6 +914,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
@@ -938,26 +957,24 @@ fs_reg
fs_visitor::vgrf(const glsl_type *const type)
{
int reg_width = dispatch_width / 8;
- return fs_reg(GRF, alloc.allocate(type_size_scalar(type) * reg_width),
+ return fs_reg(VGRF, alloc.allocate(type_size_scalar(type) * reg_width),
brw_type_for_base_type(type));
}
-/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int reg)
+fs_reg::fs_reg(enum brw_reg_file file, int nr)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
this->type = BRW_REGISTER_TYPE_F;
this->stride = (file == UNIFORM ? 0 : 1);
}
-/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type)
+fs_reg::fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
this->type = type;
this->stride = (file == UNIFORM ? 0 : 1);
}
@@ -1285,9 +1302,9 @@ fs_visitor::emit_sampleid_setup()
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
if (key->compute_sample_id) {
- fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
+ fs_reg t1(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
t1.set_smear(0);
- fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
+ fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
@@ -1308,9 +1325,15 @@ fs_visitor::emit_sampleid_setup()
* are sample 1 of subspan 0; the third group is sample 0 of
* subspan 1, and finally sample 1 of subspan 1.
*/
+
+ /* SKL+ has an extra bit for the Starting Sample Pair Index to
+ * accomodate 16x MSAA.
+ */
+ unsigned sspi_mask = devinfo->gen >= 9 ? 0x1c0 : 0xc0;
+
abld.exec_all().group(1, 0)
.AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
- fs_reg(0xc0));
+ fs_reg(sspi_mask));
abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
/* This works for both SIMD8 and SIMD16 */
@@ -1362,6 +1385,57 @@ fs_visitor::emit_discard_jump()
}
void
+fs_visitor::emit_gs_thread_end()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ struct brw_gs_prog_data *gs_prog_data =
+ (struct brw_gs_prog_data *) prog_data;
+
+ if (gs_compile->control_data_header_size_bits > 0) {
+ emit_gs_control_data_bits(this->final_gs_vertex_count);
+ }
+
+ const fs_builder abld = bld.annotate("thread end");
+ fs_inst *inst;
+
+ if (gs_prog_data->static_vertex_count != -1) {
+ foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
+ if (prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8 ||
+ prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
+ prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
+ prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) {
+ prev->eot = true;
+
+ /* Delete now dead instructions. */
+ foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) {
+ if (dead == prev)
+ break;
+ dead->remove();
+ }
+ return;
+ } else if (prev->is_control_flow() || prev->has_side_effects()) {
+ break;
+ }
+ }
+ fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)));
+ inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, hdr);
+ inst->mlen = 1;
+ } else {
+ fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
+ sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ sources[1] = this->final_gs_vertex_count;
+ abld.LOAD_PAYLOAD(payload, sources, 2, 2);
+ inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+ inst->mlen = 2;
+ }
+ inst->eot = true;
+ inst->offset = 0;
+}
+
+void
fs_visitor::assign_curb_setup()
{
if (dispatch_width == 8) {
@@ -1384,7 +1458,7 @@ fs_visitor::assign_curb_setup()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (unsigned int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == UNIFORM) {
- int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
+ int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset;
int constant_nr;
if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
constant_nr = push_constant_loc[uniform_nr];
@@ -1400,10 +1474,11 @@ fs_visitor::assign_curb_setup()
struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs +
constant_nr / 8,
constant_nr % 8);
+ brw_reg.abs = inst->src[i].abs;
+ brw_reg.negate = inst->src[i].negate;
assert(inst->src[i].stride == 0);
- inst->src[i].file = HW_REG;
- inst->src[i].fixed_hw_reg = byte_offset(
+ inst->src[i] = byte_offset(
retype(brw_reg, inst->src[i].type),
inst->src[i].subreg_offset);
}
@@ -1518,13 +1593,13 @@ fs_visitor::assign_urb_setup()
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->opcode == FS_OPCODE_LINTERP) {
- assert(inst->src[1].file == HW_REG);
- inst->src[1].fixed_hw_reg.nr += urb_start;
+ assert(inst->src[1].file == FIXED_GRF);
+ inst->src[1].nr += urb_start;
}
if (inst->opcode == FS_OPCODE_CINTERP) {
- assert(inst->src[0].file == HW_REG);
- inst->src[0].fixed_hw_reg.nr += urb_start;
+ assert(inst->src[0].file == FIXED_GRF);
+ inst->src[0].nr += urb_start;
}
}
@@ -1533,6 +1608,30 @@ fs_visitor::assign_urb_setup()
}
void
+fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
+{
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == ATTR) {
+ int grf = payload.num_regs +
+ prog_data->curb_read_length +
+ inst->src[i].nr +
+ inst->src[i].reg_offset;
+
+ unsigned width = inst->src[i].stride == 0 ? 1 : inst->exec_size;
+ struct brw_reg reg =
+ stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+ inst->src[i].subreg_offset),
+ inst->exec_size * inst->src[i].stride,
+ width, inst->src[i].stride);
+ reg.abs = inst->src[i].abs;
+ reg.negate = inst->src[i].negate;
+
+ inst->src[i] = reg;
+ }
+ }
+}
+
+void
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
@@ -1549,24 +1648,44 @@ fs_visitor::assign_vs_urb_setup()
/* Rewrite all ATTR file references to the hw grf that they land in. */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == ATTR) {
- int grf = payload.num_regs +
- prog_data->curb_read_length +
- inst->src[i].reg +
- inst->src[i].reg_offset;
-
- inst->src[i].file = HW_REG;
- inst->src[i].fixed_hw_reg =
- stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
- inst->src[i].subreg_offset),
- inst->exec_size * inst->src[i].stride,
- inst->exec_size, inst->src[i].stride);
- }
+ convert_attr_sources_to_hw_regs(inst);
+ }
+}
+
+void
+fs_visitor::assign_gs_urb_setup()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data;
+
+ first_non_payload_grf +=
+ 8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;
+
+ const unsigned first_icp_handle = payload.num_regs -
+ (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0);
+
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ /* Lower URB_READ_SIMD8 opcodes into real messages. */
+ if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
+ assert(inst->src[0].file == IMM);
+ inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
+ inst->src[0].ud,
+ 0), BRW_REGISTER_TYPE_UD);
+ /* for now, assume constant - we can do per-slot offsets later */
+ assert(inst->src[1].file == IMM);
+ inst->offset = inst->src[1].ud;
+ inst->src[1] = fs_reg();
+ inst->mlen = 1;
+ inst->base_mrf = -1;
}
+
+ /* Rewrite all ATTR file references to GRFs. */
+ convert_attr_sources_to_hw_regs(inst);
}
}
+
/**
* Split large virtual GRFs into separate components if we can.
*
@@ -1609,30 +1728,30 @@ fs_visitor::split_virtual_grfs()
/* Mark all used registers as fully splittable */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF) {
- int reg = vgrf_to_reg[inst->dst.reg];
- for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++)
+ if (inst->dst.file == VGRF) {
+ int reg = vgrf_to_reg[inst->dst.nr];
+ for (unsigned j = 1; j < this->alloc.sizes[inst->dst.nr]; j++)
split_points[reg + j] = true;
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- int reg = vgrf_to_reg[inst->src[i].reg];
- for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++)
+ if (inst->src[i].file == VGRF) {
+ int reg = vgrf_to_reg[inst->src[i].nr];
+ for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].nr]; j++)
split_points[reg + j] = true;
}
}
}
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF) {
- int reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+ if (inst->dst.file == VGRF) {
+ int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
for (int j = 1; j < inst->regs_written; j++)
split_points[reg + j] = false;
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- int reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
+ if (inst->src[i].file == VGRF) {
+ int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
for (int j = 1; j < inst->regs_read(i); j++)
split_points[reg + j] = false;
}
@@ -1678,16 +1797,16 @@ fs_visitor::split_virtual_grfs()
assert(reg == reg_count);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF) {
- reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
- inst->dst.reg = new_virtual_grf[reg];
+ if (inst->dst.file == VGRF) {
+ reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
+ inst->dst.nr = new_virtual_grf[reg];
inst->dst.reg_offset = new_reg_offset[reg];
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
- inst->src[i].reg = new_virtual_grf[reg];
+ if (inst->src[i].file == VGRF) {
+ reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
+ inst->src[i].nr = new_virtual_grf[reg];
inst->src[i].reg_offset = new_reg_offset[reg];
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
@@ -1714,12 +1833,12 @@ fs_visitor::compact_virtual_grfs()
/* Mark which virtual GRFs are used. */
foreach_block_and_inst(block, const fs_inst, inst, cfg) {
- if (inst->dst.file == GRF)
- remap_table[inst->dst.reg] = 0;
+ if (inst->dst.file == VGRF)
+ remap_table[inst->dst.nr] = 0;
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF)
- remap_table[inst->src[i].reg] = 0;
+ if (inst->src[i].file == VGRF)
+ remap_table[inst->src[i].nr] = 0;
}
}
@@ -1743,12 +1862,12 @@ fs_visitor::compact_virtual_grfs()
/* Patch all the instructions to use the newly renumbered registers */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF)
- inst->dst.reg = remap_table[inst->dst.reg];
+ if (inst->dst.file == VGRF)
+ inst->dst.nr = remap_table[inst->dst.nr];
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF)
- inst->src[i].reg = remap_table[inst->src[i].reg];
+ if (inst->src[i].file == VGRF)
+ inst->src[i].nr = remap_table[inst->src[i].nr];
}
}
@@ -1757,9 +1876,9 @@ fs_visitor::compact_virtual_grfs()
* think some random VGRF is delta_xy.
*/
for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
- if (delta_xy[i].file == GRF) {
- if (remap_table[delta_xy[i].reg] != -1) {
- delta_xy[i].reg = remap_table[delta_xy[i].reg];
+ if (delta_xy[i].file == VGRF) {
+ if (remap_table[delta_xy[i].nr] != -1) {
+ delta_xy[i].nr = remap_table[delta_xy[i].nr];
} else {
delta_xy[i].file = BAD_FILE;
}
@@ -1811,7 +1930,7 @@ fs_visitor::assign_constant_locations()
continue;
if (inst->src[i].reladdr) {
- int uniform = inst->src[i].reg;
+ int uniform = inst->src[i].nr;
/* If this array isn't already present in the pull constant buffer,
* add it.
@@ -1823,7 +1942,7 @@ fs_visitor::assign_constant_locations()
}
} else {
/* Mark the the one accessed uniform as live */
- int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
if (constant_nr >= 0 && constant_nr < (int) uniforms)
is_live[constant_nr] = true;
}
@@ -1899,7 +2018,7 @@ fs_visitor::demote_pull_constants()
continue;
int pull_index;
- unsigned location = inst->src[i].reg + inst->src[i].reg_offset;
+ unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
if (location >= uniforms) /* Out of bounds access */
pull_index = -1;
else
@@ -1910,7 +2029,7 @@ fs_visitor::demote_pull_constants()
/* Set up the annotation tracking for new generated instructions. */
const fs_builder ibld(this, block, inst);
- fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
+ const unsigned index = stage_prog_data->binding_table.pull_constants_start;
fs_reg dst = vgrf(glsl_type::float_type);
assert(inst->src[i].stride == 0);
@@ -1918,7 +2037,7 @@ fs_visitor::demote_pull_constants()
/* Generate a pull load into dst. */
if (inst->src[i].reladdr) {
VARYING_PULL_CONSTANT_LOAD(ibld, dst,
- surf_index,
+ fs_reg(index),
*inst->src[i].reladdr,
pull_index);
inst->src[i].reladdr = NULL;
@@ -1927,13 +2046,14 @@ fs_visitor::demote_pull_constants()
const fs_builder ubld = ibld.exec_all().group(8, 0);
fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- dst, surf_index, offset);
+ dst, fs_reg(index), offset);
inst->src[i].set_smear(pull_index & 3);
}
+ brw_mark_surface_used(prog_data, index);
/* Rewrite the instruction to use the temporary VGRF. */
- inst->src[i].file = GRF;
- inst->src[i].reg = dst.reg;
+ inst->src[i].file = VGRF;
+ inst->src[i].nr = dst.nr;
inst->src[i].reg_offset = 0;
}
}
@@ -1955,8 +2075,7 @@ fs_visitor::opt_algebraic()
if (inst->dst.type != inst->src[0].type)
assert(!"unimplemented: saturate mixed types");
- if (brw_saturate_immediate(inst->dst.type,
- &inst->src[0].fixed_hw_reg)) {
+ if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) {
inst->saturate = false;
progress = true;
}
@@ -1996,7 +2115,7 @@ fs_visitor::opt_algebraic()
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0].fixed_hw_reg.dw1.f *= inst->src[1].fixed_hw_reg.dw1.f;
+ inst->src[0].f *= inst->src[1].f;
inst->src[1] = reg_undef;
progress = true;
break;
@@ -2017,7 +2136,7 @@ fs_visitor::opt_algebraic()
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0].fixed_hw_reg.dw1.f += inst->src[1].fixed_hw_reg.dw1.f;
+ inst->src[0].f += inst->src[1].f;
inst->src[1] = reg_undef;
progress = true;
break;
@@ -2066,7 +2185,7 @@ fs_visitor::opt_algebraic()
case BRW_CONDITIONAL_L:
switch (inst->src[1].type) {
case BRW_REGISTER_TYPE_F:
- if (inst->src[1].fixed_hw_reg.dw1.f >= 1.0f) {
+ if (inst->src[1].f >= 1.0f) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[1] = reg_undef;
inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2081,7 +2200,7 @@ fs_visitor::opt_algebraic()
case BRW_CONDITIONAL_G:
switch (inst->src[1].type) {
case BRW_REGISTER_TYPE_F:
- if (inst->src[1].fixed_hw_reg.dw1.f <= 0.0f) {
+ if (inst->src[1].f <= 0.0f) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[1] = reg_undef;
inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2118,7 +2237,7 @@ fs_visitor::opt_algebraic()
progress = true;
} else if (inst->src[1].file == IMM && inst->src[2].file == IMM) {
inst->opcode = BRW_OPCODE_ADD;
- inst->src[1].fixed_hw_reg.dw1.f *= inst->src[2].fixed_hw_reg.dw1.f;
+ inst->src[1].f *= inst->src[2].f;
inst->src[2] = reg_undef;
progress = true;
}
@@ -2143,7 +2262,7 @@ fs_visitor::opt_algebraic()
} else if (inst->src[1].file == IMM) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = component(inst->src[0],
- inst->src[1].fixed_hw_reg.dw1.ud);
+ inst->src[1].ud);
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
@@ -2344,31 +2463,31 @@ fs_visitor::opt_register_renaming()
/* Rewrite instruction sources. */
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF &&
- remap[inst->src[i].reg] != -1 &&
- remap[inst->src[i].reg] != inst->src[i].reg) {
- inst->src[i].reg = remap[inst->src[i].reg];
+ if (inst->src[i].file == VGRF &&
+ remap[inst->src[i].nr] != -1 &&
+ remap[inst->src[i].nr] != inst->src[i].nr) {
+ inst->src[i].nr = remap[inst->src[i].nr];
progress = true;
}
}
- const int dst = inst->dst.reg;
+ const int dst = inst->dst.nr;
if (depth == 0 &&
- inst->dst.file == GRF &&
- alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
+ inst->dst.file == VGRF &&
+ alloc.sizes[inst->dst.nr] == inst->exec_size / 8 &&
!inst->is_partial_write()) {
if (remap[dst] == -1) {
remap[dst] = dst;
} else {
remap[dst] = alloc.allocate(inst->exec_size / 8);
- inst->dst.reg = remap[dst];
+ inst->dst.nr = remap[dst];
progress = true;
}
- } else if (inst->dst.file == GRF &&
+ } else if (inst->dst.file == VGRF &&
remap[dst] != -1 &&
remap[dst] != dst) {
- inst->dst.reg = remap[dst];
+ inst->dst.nr = remap[dst];
progress = true;
}
}
@@ -2377,8 +2496,8 @@ fs_visitor::opt_register_renaming()
invalidate_live_intervals();
for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
- if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
- delta_xy[i].reg = remap[delta_xy[i].reg];
+ if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != -1) {
+ delta_xy[i].nr = remap[delta_xy[i].nr];
}
}
}
@@ -2445,7 +2564,7 @@ fs_visitor::compute_to_mrf()
if (inst->opcode != BRW_OPCODE_MOV ||
inst->is_partial_write() ||
- inst->dst.file != MRF || inst->src[0].file != GRF ||
+ inst->dst.file != MRF || inst->src[0].file != VGRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].abs || inst->src[0].negate ||
!inst->src[0].is_contiguous() ||
@@ -2455,9 +2574,9 @@ fs_visitor::compute_to_mrf()
/* Work out which hardware MRF registers are written by this
* instruction.
*/
- int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
+ int mrf_low = inst->dst.nr & ~BRW_MRF_COMPR4;
int mrf_high;
- if (inst->dst.reg & BRW_MRF_COMPR4) {
+ if (inst->dst.nr & BRW_MRF_COMPR4) {
mrf_high = mrf_low + 4;
} else if (inst->exec_size == 16) {
mrf_high = mrf_low + 1;
@@ -2468,15 +2587,15 @@ fs_visitor::compute_to_mrf()
/* Can't compute-to-MRF this GRF if someone else was going to
* read it later.
*/
- if (this->virtual_grf_end[inst->src[0].reg] > ip)
+ if (this->virtual_grf_end[inst->src[0].nr] > ip)
continue;
/* Found a move of a GRF to a MRF. Let's see if we can go
* rewrite the thing that made this GRF to write into the MRF.
*/
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg == inst->src[0].reg) {
+ if (scan_inst->dst.file == VGRF &&
+ scan_inst->dst.nr == inst->src[0].nr) {
/* Found the last thing to write our reg we want to turn
* into a compute-to-MRF.
*/
@@ -2511,7 +2630,7 @@ fs_visitor::compute_to_mrf()
if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
/* Found the creator of our MRF's source value. */
scan_inst->dst.file = MRF;
- scan_inst->dst.reg = inst->dst.reg;
+ scan_inst->dst.nr = inst->dst.nr;
scan_inst->saturate |= inst->saturate;
inst->remove(block);
progress = true;
@@ -2531,8 +2650,8 @@ fs_visitor::compute_to_mrf()
*/
bool interfered = false;
for (int i = 0; i < scan_inst->sources; i++) {
- if (scan_inst->src[i].file == GRF &&
- scan_inst->src[i].reg == inst->src[0].reg &&
+ if (scan_inst->src[i].file == VGRF &&
+ scan_inst->src[i].nr == inst->src[0].nr &&
scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
interfered = true;
}
@@ -2544,10 +2663,10 @@ fs_visitor::compute_to_mrf()
/* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
- int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
+ int scan_mrf_low = scan_inst->dst.nr & ~BRW_MRF_COMPR4;
int scan_mrf_high;
- if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
+ if (scan_inst->dst.nr & BRW_MRF_COMPR4) {
scan_mrf_high = scan_mrf_low + 4;
} else if (scan_inst->exec_size == 16) {
scan_mrf_high = scan_mrf_low + 1;
@@ -2690,8 +2809,8 @@ fs_visitor::emit_repclear_shader()
/* Now that we have the uniform assigned, go ahead and force it to a vec4. */
if (uniforms == 1) {
- assert(mov->src[0].file == HW_REG);
- mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ assert(mov->src[0].file == FIXED_GRF);
+ mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
}
}
@@ -2718,7 +2837,7 @@ fs_visitor::remove_duplicate_mrf_writes()
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF) {
- fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
+ fs_inst *prev_inst = last_mrf_move[inst->dst.nr];
if (prev_inst && inst->equals(prev_inst)) {
inst->remove(block);
progress = true;
@@ -2728,7 +2847,7 @@ fs_visitor::remove_duplicate_mrf_writes()
/* Clear out the last-write records for MRFs that were overwritten. */
if (inst->dst.file == MRF) {
- last_mrf_move[inst->dst.reg] = NULL;
+ last_mrf_move[inst->dst.nr] = NULL;
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
@@ -2741,10 +2860,10 @@ fs_visitor::remove_duplicate_mrf_writes()
}
/* Clear out any MRF move records whose sources got overwritten. */
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
for (unsigned int i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
if (last_mrf_move[i] &&
- last_mrf_move[i]->src[0].reg == inst->dst.reg) {
+ last_mrf_move[i]->src[0].nr == inst->dst.nr) {
last_mrf_move[i] = NULL;
}
}
@@ -2752,9 +2871,9 @@ fs_visitor::remove_duplicate_mrf_writes()
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF &&
- inst->src[0].file == GRF &&
+ inst->src[0].file == VGRF &&
!inst->is_partial_write()) {
- last_mrf_move[inst->dst.reg] = inst;
+ last_mrf_move[inst->dst.nr] = inst;
}
}
@@ -2770,11 +2889,8 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
/* Clear the flag for registers that actually got read (as expected). */
for (int i = 0; i < inst->sources; i++) {
int grf;
- if (inst->src[i].file == GRF) {
- grf = inst->src[i].reg;
- } else if (inst->src[i].file == HW_REG &&
- inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
- grf = inst->src[i].fixed_hw_reg.nr;
+ if (inst->src[i].file == VGRF || inst->src[i].file == FIXED_GRF) {
+ grf = inst->src[i].nr;
} else {
continue;
}
@@ -2809,7 +2925,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
fs_inst *inst)
{
int write_len = inst->regs_written;
- int first_write_grf = inst->dst.reg;
+ int first_write_grf = inst->dst.nr;
bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2840,9 +2956,9 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
* instruction but a MOV that might have left us an outstanding
* dependency has more latency than a MOV.
*/
- if (scan_inst->dst.file == GRF) {
+ if (scan_inst->dst.file == VGRF) {
for (int i = 0; i < scan_inst->regs_written; i++) {
- int reg = scan_inst->dst.reg + i;
+ int reg = scan_inst->dst.nr + i;
if (reg >= first_write_grf &&
reg < first_write_grf + write_len &&
@@ -2880,7 +2996,7 @@ void
fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
{
int write_len = inst->regs_written;
- int first_write_grf = inst->dst.reg;
+ int first_write_grf = inst->dst.nr;
bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2906,13 +3022,13 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
/* We insert our reads as late as possible since they're reading the
* result of a SEND, which has massive latency.
*/
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg >= first_write_grf &&
- scan_inst->dst.reg < first_write_grf + write_len &&
- needs_dep[scan_inst->dst.reg - first_write_grf]) {
+ if (scan_inst->dst.file == VGRF &&
+ scan_inst->dst.nr >= first_write_grf &&
+ scan_inst->dst.nr < first_write_grf + write_len &&
+ needs_dep[scan_inst->dst.nr - first_write_grf]) {
DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
- scan_inst->dst.reg);
- needs_dep[scan_inst->dst.reg - first_write_grf] = false;
+ scan_inst->dst.nr);
+ needs_dep[scan_inst->dst.nr - first_write_grf] = false;
}
/* Continue the loop only if we haven't resolved all the dependencies */
@@ -2939,7 +3055,7 @@ fs_visitor::insert_gen4_send_dependency_workarounds()
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->mlen != 0 && inst->dst.file == GRF) {
+ if (inst->mlen != 0 && inst->dst.file == VGRF) {
insert_gen4_pre_send_dependency_workarounds(block, inst);
insert_gen4_post_send_dependency_workarounds(block, inst);
progress = true;
@@ -2980,18 +3096,18 @@ fs_visitor::lower_uniform_pull_constant_loads()
fs_reg const_offset_reg = inst->src[1];
assert(const_offset_reg.file == IMM &&
const_offset_reg.type == BRW_REGISTER_TYPE_UD);
- const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
+ const_offset_reg.ud /= 4;
fs_reg payload, offset;
if (devinfo->gen >= 9) {
/* We have to use a message header on Skylake to get SIMD4x2
* mode. Reserve space for the register.
*/
- offset = payload = fs_reg(GRF, alloc.allocate(2));
+ offset = payload = fs_reg(VGRF, alloc.allocate(2));
offset.reg_offset++;
inst->mlen = 2;
} else {
- offset = payload = fs_reg(GRF, alloc.allocate(1));
+ offset = payload = fs_reg(VGRF, alloc.allocate(1));
inst->mlen = 1;
}
@@ -3038,13 +3154,13 @@ fs_visitor::lower_load_payload()
if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
continue;
- assert(inst->dst.file == MRF || inst->dst.file == GRF);
+ assert(inst->dst.file == MRF || inst->dst.file == VGRF);
assert(inst->saturate == false);
fs_reg dst = inst->dst;
/* Get rid of COMPR4. We'll add it back in if we need it */
if (dst.file == MRF)
- dst.reg = dst.reg & ~BRW_MRF_COMPR4;
+ dst.nr = dst.nr & ~BRW_MRF_COMPR4;
const fs_builder ibld(this, block, inst);
const fs_builder hbld = ibld.exec_all().group(8, 0);
@@ -3058,7 +3174,7 @@ fs_visitor::lower_load_payload()
dst = offset(dst, hbld, 1);
}
- if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
+ if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) &&
inst->exec_size > 8) {
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
* a straightforward copy. Instead, the result of the
@@ -3082,18 +3198,18 @@ fs_visitor::lower_load_payload()
if (inst->src[i].file != BAD_FILE) {
if (devinfo->has_compr4) {
fs_reg compr4_dst = retype(dst, inst->src[i].type);
- compr4_dst.reg |= BRW_MRF_COMPR4;
+ compr4_dst.nr |= BRW_MRF_COMPR4;
ibld.MOV(compr4_dst, inst->src[i]);
} else {
/* Platform doesn't have COMPR4. We have to fake it */
fs_reg mov_dst = retype(dst, inst->src[i].type);
ibld.half(0).MOV(mov_dst, half(inst->src[i], 0));
- mov_dst.reg += 4;
+ mov_dst.nr += 4;
ibld.half(1).MOV(mov_dst, half(inst->src[i], 1));
}
}
- dst.reg++;
+ dst.nr++;
}
/* The loop above only ever incremented us through the first set
@@ -3101,7 +3217,7 @@ fs_visitor::lower_load_payload()
* actually wrote to the first 8 registers, so we need to take
* that into account now.
*/
- dst.reg += 4;
+ dst.nr += 4;
/* The COMPR4 code took care of the first 4 sources. We'll let
* the regular path handle any remaining sources. Yes, we are
@@ -3149,7 +3265,7 @@ fs_visitor::lower_integer_multiplication()
continue;
if (inst->src[1].file == IMM &&
- inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+ inst->src[1].ud < (1 << 16)) {
/* The MUL instruction isn't commutative. On Gen <= 6, only the low
* 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
* src1 are used.
@@ -3158,7 +3274,7 @@ fs_visitor::lower_integer_multiplication()
* single MUL instruction with that value in the proper location.
*/
if (devinfo->gen < 7) {
- fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+ fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
ibld.MOV(imm, inst->src[1]);
ibld.MUL(inst->dst, imm, inst->src[0]);
@@ -3213,11 +3329,11 @@ fs_visitor::lower_integer_multiplication()
fs_reg orig_dst = inst->dst;
if (orig_dst.is_null() || orig_dst.file == MRF) {
- inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
fs_reg low = inst->dst;
- fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
+ fs_reg high(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
if (devinfo->gen >= 7) {
@@ -3225,8 +3341,8 @@ fs_visitor::lower_integer_multiplication()
fs_reg src1_1_w = inst->src[1];
if (inst->src[1].file == IMM) {
- src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
- src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+ src1_0_w.ud &= 0xffff;
+ src1_1_w.ud >>= 16;
} else {
src1_0_w.type = BRW_REGISTER_TYPE_UW;
if (src1_0_w.stride != 0) {
@@ -3381,7 +3497,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
const unsigned components =
- inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
+ inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3411,7 +3527,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
}
if (payload.aa_dest_stencil_reg) {
- sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1));
+ sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
.MOV(sources[length],
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
@@ -3419,7 +3535,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
}
if (prog_data->uses_omask) {
- sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1),
+ sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1),
BRW_REGISTER_TYPE_UD);
/* Hand over gl_SampleMask. Only the lower 16 bits of each channel are
@@ -3485,9 +3601,9 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
- fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+ fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
- payload.reg = bld.shader->alloc.allocate(load->regs_written);
+ payload.nr = bld.shader->alloc.allocate(load->regs_written);
load->dst = payload;
inst->src[0] = payload;
@@ -3502,7 +3618,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
* will do this for us if we just give it a COMPR4 destination.
*/
if (devinfo->gen < 6 && bld.dispatch_width() == 16)
- load->dst.reg |= BRW_MRF_COMPR4;
+ load->dst.nr |= BRW_MRF_COMPR4;
inst->resize_sources(0);
inst->base_mrf = 1;
@@ -3612,8 +3728,8 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
inst->src[0] = reg_undef;
inst->src[1] = sampler;
inst->resize_sources(2);
- inst->base_mrf = msg_begin.reg;
- inst->mlen = msg_end.reg - msg_begin.reg;
+ inst->base_mrf = msg_begin.nr;
+ inst->mlen = msg_end.nr - msg_begin.nr;
inst->header_size = 1;
}
@@ -3637,7 +3753,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
* go headerless.
*/
header_size = 1;
- message.reg--;
+ message.nr--;
}
for (unsigned i = 0; i < coord_components; i++) {
@@ -3707,8 +3823,8 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
inst->src[0] = reg_undef;
inst->src[1] = sampler;
inst->resize_sources(2);
- inst->base_mrf = message.reg;
- inst->mlen = msg_end.reg - message.reg;
+ inst->base_mrf = message.nr;
+ inst->mlen = msg_end.nr - message.nr;
inst->header_size = header_size;
/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
@@ -3721,7 +3837,7 @@ is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler)
if (devinfo->gen < 8 && !devinfo->is_haswell)
return false;
- return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+ return sampler.file != IMM || sampler.ud >= 16;
}
static void
@@ -3844,17 +3960,31 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
coordinate_done = true;
break;
case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
case SHADER_OPCODE_TXF_MCS:
- if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) {
+ if (op == SHADER_OPCODE_TXF_UMS ||
+ op == SHADER_OPCODE_TXF_CMS ||
+ op == SHADER_OPCODE_TXF_CMS_W) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
length++;
}
- if (op == SHADER_OPCODE_TXF_CMS) {
+ if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {
/* Data from the multisample control surface. */
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
length++;
+
+ /* On Gen9+ we'll use ld2dms_w instead which has two registers for
+ * the MCS data.
+ */
+ if (op == SHADER_OPCODE_TXF_CMS_W) {
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD),
+ mcs.file == IMM ?
+ mcs :
+ offset(mcs, bld, 1));
+ length++;
+ }
}
/* There is no offsetting for this message; just copy in the integer
@@ -3912,7 +4042,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
else
mlen = length * reg_width;
- const fs_reg src_payload = fs_reg(GRF, bld.shader->alloc.allocate(mlen),
+ const fs_reg src_payload = fs_reg(VGRF, bld.shader->alloc.allocate(mlen),
BRW_REGISTER_TYPE_F);
bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
@@ -3942,8 +4072,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
const fs_reg &sampler = inst->src[6];
const fs_reg &offset_value = inst->src[7];
assert(inst->src[8].file == IMM && inst->src[9].file == IMM);
- const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
- const unsigned grad_components = inst->src[9].fixed_hw_reg.dw1.ud;
+ const unsigned coord_components = inst->src[8].ud;
+ const unsigned grad_components = inst->src[9].ud;
if (devinfo->gen >= 7) {
lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
@@ -4068,6 +4198,10 @@ fs_visitor::lower_logical_sends()
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);
break;
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W);
+ break;
+
case SHADER_OPCODE_TXF_UMS_LOGICAL:
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);
break;
@@ -4260,6 +4394,21 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
else
return inst->exec_size;
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
+ /* This opcode can take up to 6 arguments which means that in some
+ * circumstances it can end up with a message that is too long in SIMD16
+ * mode.
+ */
+ const unsigned coord_components = inst->src[8].ud;
+ /* First three arguments are the sample index and the two arguments for
+ * the MCS data.
+ */
+ if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
+ return 8;
+ else
+ return inst->exec_size;
+ }
+
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
@@ -4473,51 +4622,48 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
}
switch (inst->dst.file) {
- case GRF:
- fprintf(file, "vgrf%d", inst->dst.reg);
- if (alloc.sizes[inst->dst.reg] != inst->regs_written ||
+ case VGRF:
+ fprintf(file, "vgrf%d", inst->dst.nr);
+ if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
inst->dst.subreg_offset)
fprintf(file, "+%d.%d",
inst->dst.reg_offset, inst->dst.subreg_offset);
break;
+ case FIXED_GRF:
+ fprintf(file, "g%d", inst->dst.nr);
+ break;
case MRF:
- fprintf(file, "m%d", inst->dst.reg);
+ fprintf(file, "m%d", inst->dst.nr);
break;
case BAD_FILE:
fprintf(file, "(null)");
break;
case UNIFORM:
- fprintf(file, "***u%d***", inst->dst.reg + inst->dst.reg_offset);
+ fprintf(file, "***u%d***", inst->dst.nr + inst->dst.reg_offset);
break;
case ATTR:
- fprintf(file, "***attr%d***", inst->dst.reg + inst->dst.reg_offset);
+ fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.reg_offset);
break;
- case HW_REG:
- if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
- switch (inst->dst.fixed_hw_reg.nr) {
- case BRW_ARF_NULL:
- fprintf(file, "null");
- break;
- case BRW_ARF_ADDRESS:
- fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr);
- break;
- case BRW_ARF_ACCUMULATOR:
- fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr);
- break;
- case BRW_ARF_FLAG:
- fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
- inst->dst.fixed_hw_reg.subnr);
- break;
- default:
- fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
- inst->dst.fixed_hw_reg.subnr);
- break;
- }
- } else {
- fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr);
+ case ARF:
+ switch (inst->dst.nr) {
+ case BRW_ARF_NULL:
+ fprintf(file, "null");
+ break;
+ case BRW_ARF_ADDRESS:
+ fprintf(file, "a0.%d", inst->dst.subnr);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ fprintf(file, "acc%d", inst->dst.subnr);
+ break;
+ case BRW_ARF_FLAG:
+ fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+ break;
+ default:
+ fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+ break;
}
- if (inst->dst.fixed_hw_reg.subnr)
- fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr);
+ if (inst->dst.subnr)
+ fprintf(file, "+%d", inst->dst.subnr);
break;
case IMM:
unreachable("not reached");
@@ -4530,21 +4676,24 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->src[i].abs)
fprintf(file, "|");
switch (inst->src[i].file) {
- case GRF:
- fprintf(file, "vgrf%d", inst->src[i].reg);
- if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) ||
+ case VGRF:
+ fprintf(file, "vgrf%d", inst->src[i].nr);
+ if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) ||
inst->src[i].subreg_offset)
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
inst->src[i].subreg_offset);
break;
+ case FIXED_GRF:
+ fprintf(file, "g%d", inst->src[i].nr);
+ break;
case MRF:
- fprintf(file, "***m%d***", inst->src[i].reg);
+ fprintf(file, "***m%d***", inst->src[i].nr);
break;
case ATTR:
- fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset);
+ fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].reg_offset);
break;
case UNIFORM:
- fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset);
+ fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
if (inst->src[i].reladdr) {
fprintf(file, "+reladdr");
} else if (inst->src[i].subreg_offset) {
@@ -4558,60 +4707,48 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
case IMM:
switch (inst->src[i].type) {
case BRW_REGISTER_TYPE_F:
- fprintf(file, "%ff", inst->src[i].fixed_hw_reg.dw1.f);
+ fprintf(file, "%ff", inst->src[i].f);
break;
case BRW_REGISTER_TYPE_W:
case BRW_REGISTER_TYPE_D:
- fprintf(file, "%dd", inst->src[i].fixed_hw_reg.dw1.d);
+ fprintf(file, "%dd", inst->src[i].d);
break;
case BRW_REGISTER_TYPE_UW:
case BRW_REGISTER_TYPE_UD:
- fprintf(file, "%uu", inst->src[i].fixed_hw_reg.dw1.ud);
+ fprintf(file, "%uu", inst->src[i].ud);
break;
case BRW_REGISTER_TYPE_VF:
fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 0) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 8) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 16) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 24) & 0xff));
+ brw_vf_to_float((inst->src[i].ud >> 0) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 8) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
break;
default:
fprintf(file, "???");
break;
}
break;
- case HW_REG:
- if (inst->src[i].fixed_hw_reg.negate)
- fprintf(file, "-");
- if (inst->src[i].fixed_hw_reg.abs)
- fprintf(file, "|");
- if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
- switch (inst->src[i].fixed_hw_reg.nr) {
- case BRW_ARF_NULL:
- fprintf(file, "null");
- break;
- case BRW_ARF_ADDRESS:
- fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr);
- break;
- case BRW_ARF_ACCUMULATOR:
- fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr);
- break;
- case BRW_ARF_FLAG:
- fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
- inst->src[i].fixed_hw_reg.subnr);
- break;
- default:
- fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
- inst->src[i].fixed_hw_reg.subnr);
- break;
- }
- } else {
- fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr);
+ case ARF:
+ switch (inst->src[i].nr) {
+ case BRW_ARF_NULL:
+ fprintf(file, "null");
+ break;
+ case BRW_ARF_ADDRESS:
+ fprintf(file, "a0.%d", inst->src[i].subnr);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ fprintf(file, "acc%d", inst->src[i].subnr);
+ break;
+ case BRW_ARF_FLAG:
+ fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+ break;
+ default:
+ fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+ break;
}
- if (inst->src[i].fixed_hw_reg.subnr)
- fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr);
- if (inst->src[i].fixed_hw_reg.abs)
- fprintf(file, "|");
+ if (inst->src[i].subnr)
+ fprintf(file, "+%d", inst->src[i].subnr);
break;
}
if (inst->src[i].abs)
@@ -4627,6 +4764,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, " ");
+ if (inst->force_writemask_all)
+ fprintf(file, "NoMask ");
+
if (dispatch_width == 16 && inst->exec_size == 8) {
if (inst->force_sechalf)
fprintf(file, "2ndhalf ");
@@ -4779,6 +4919,45 @@ fs_visitor::setup_vs_payload()
*
*/
void
+fs_visitor::setup_gs_payload()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ struct brw_gs_prog_data *gs_prog_data =
+ (struct brw_gs_prog_data *) prog_data;
+ struct brw_vue_prog_data *vue_prog_data =
+ (struct brw_vue_prog_data *) prog_data;
+
+ /* R0: thread header, R1: output URB handles */
+ payload.num_regs = 2;
+
+ if (gs_prog_data->include_primitive_id) {
+ /* R2: Primitive ID 0..7 */
+ payload.num_regs++;
+ }
+
+ /* Use a maximum of 32 registers for push-model inputs. */
+ const unsigned max_push_components = 32;
+
+ /* If pushing our inputs would take too many registers, reduce the URB read
+ * length (which is in HWords, or 8 registers), and resort to pulling.
+ *
+ * Note that the GS reads <URB Read Length> HWords for every vertex - so we
+ * have to multiply by VerticesIn to obtain the total storage requirement.
+ */
+ if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
+ max_push_components) {
+ gs_prog_data->base.include_vue_handles = true;
+
+ /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
+ payload.num_regs += nir->info.gs.vertices_in;
+
+ vue_prog_data->urb_read_length =
+ ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;
+ }
+}
+
+void
fs_visitor::setup_cs_payload()
{
assert(devinfo->gen >= 7);
@@ -4925,7 +5104,7 @@ fs_visitor::fixup_3src_null_dest()
{
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
if (inst->is_3src() && inst->dst.is_null()) {
- inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
}
@@ -5035,6 +5214,55 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
}
bool
+fs_visitor::run_gs()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ setup_gs_payload();
+
+ this->final_gs_vertex_count = vgrf(glsl_type::uint_type);
+
+ if (gs_compile->control_data_header_size_bits > 0) {
+ /* Create a VGRF to store accumulated control data bits. */
+ this->control_data_bits = vgrf(glsl_type::uint_type);
+
+ /* If we're outputting more than 32 control data bits, then EmitVertex()
+ * will set control_data_bits to 0 after emitting the first vertex.
+ * Otherwise, we need to initialize it to 0 here.
+ */
+ if (gs_compile->control_data_header_size_bits <= 32) {
+ const fs_builder abld = bld.annotate("initialize control data bits");
+ abld.MOV(this->control_data_bits, fs_reg(0u));
+ }
+ }
+
+ if (shader_time_index >= 0)
+ emit_shader_time_begin();
+
+ emit_nir_code();
+
+ emit_gs_thread_end();
+
+ if (shader_time_index >= 0)
+ emit_shader_time_end();
+
+ if (failed)
+ return false;
+
+ calculate_cfg();
+
+ optimize();
+
+ assign_curb_setup();
+ assign_gs_urb_setup();
+
+ fixup_3src_null_dest();
+ allocate_registers();
+
+ return !failed;
+}
+
+bool
fs_visitor::run_fs(bool do_rep_send)
{
brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8058b344b7a..f40e58b8ca0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -70,9 +70,10 @@ offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
switch (reg.file) {
case BAD_FILE:
break;
- case GRF:
+ case ARF:
+ case FIXED_GRF:
case MRF:
- case HW_REG:
+ case VGRF:
case ATTR:
return byte_offset(reg,
delta * reg.component_size(bld.dispatch_width()));
@@ -105,7 +106,8 @@ public:
void *mem_ctx,
struct brw_gs_compile *gs_compile,
struct brw_gs_prog_data *prog_data,
- const nir_shader *shader);
+ const nir_shader *shader,
+ int shader_time_index);
void init();
~fs_visitor();
@@ -131,18 +133,22 @@ public:
bool run_fs(bool do_rep_send);
bool run_vs(gl_clip_plane *clip_planes);
+ bool run_gs();
bool run_cs();
void optimize();
void allocate_registers();
void setup_payload_gen4();
void setup_payload_gen6();
void setup_vs_payload();
+ void setup_gs_payload();
void setup_cs_payload();
void fixup_3src_null_dest();
void assign_curb_setup();
void calculate_urb_setup();
void assign_urb_setup();
+ void convert_attr_sources_to_hw_regs(fs_inst *inst);
void assign_vs_urb_setup();
+ void assign_gs_urb_setup();
bool assign_regs(bool allow_spilling);
void assign_regs_trivial();
void calculate_payload_ranges(int payload_node_count,
@@ -258,6 +264,14 @@ public:
nir_load_const_instr *instr);
void nir_emit_undef(const brw::fs_builder &bld,
nir_ssa_undef_instr *instr);
+ void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
+ void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
+ void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
+ void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
@@ -280,7 +294,16 @@ public:
fs_reg color1, fs_reg color2,
fs_reg src0_alpha, unsigned components);
void emit_fb_writes();
- void emit_urb_writes();
+ void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
+ void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
+ unsigned stream_id);
+ void emit_gs_control_data_bits(const fs_reg &vertex_count);
+ void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
+ void emit_gs_vertex(const nir_src &vertex_count_nir_src,
+ unsigned stream_id);
+ void emit_gs_thread_end();
+ void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
+ unsigned offset, unsigned num_components);
void emit_cs_terminate();
fs_reg *emit_cs_local_invocation_id_setup();
fs_reg *emit_cs_work_group_id_setup();
@@ -388,6 +411,8 @@ public:
fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
fs_reg shader_start_time;
fs_reg userplane[MAX_CLIP_PLANES];
+ fs_reg final_gs_vertex_count;
+ fs_reg control_data_bits;
unsigned grf_used;
bool spilled_any_registers;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index f121f3463d3..22b2f22073f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -179,7 +179,7 @@ namespace brw {
assert(dispatch_width() <= 32);
if (n > 0)
- return dst_reg(GRF, shader->alloc.allocate(
+ return dst_reg(VGRF, shader->alloc.allocate(
DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
REG_SIZE)),
type);
@@ -224,12 +224,13 @@ namespace brw {
src_reg
sample_mask_reg() const
{
- const bool uses_kill =
- (shader->stage == MESA_SHADER_FRAGMENT &&
- ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill);
- return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) :
- uses_kill ? brw_flag_reg(0, 1) :
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
+ if (shader->stage != MESA_SHADER_FRAGMENT) {
+ return src_reg(0xffff);
+ } else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) {
+ return brw_flag_reg(0, 1);
+ } else {
+ return retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD);
+ }
}
/**
@@ -595,7 +596,7 @@ namespace brw {
src_reg
fix_3src_operand(const src_reg &src) const
{
- if (src.file == GRF || src.file == UNIFORM || src.stride > 1) {
+ if (src.file == VGRF || src.file == UNIFORM || src.stride > 1) {
return src;
} else {
dst_reg expanded = vgrf(src.type);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
index 883e8d2a49f..8fdc959f992 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -62,7 +62,7 @@ opt_cmod_propagation_local(bblock_t *block)
inst->opcode != BRW_OPCODE_MOV) ||
inst->predicate != BRW_PREDICATE_NONE ||
!inst->dst.is_null() ||
- inst->src[0].file != GRF ||
+ inst->src[0].file != VGRF ||
inst->src[0].abs)
continue;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index c182232285e..0c115f50748 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -121,7 +121,7 @@ struct imm {
* constant value.
*/
uint8_t subreg_offset;
- uint16_t reg;
+ uint16_t nr;
/** The number of coissuable instructions using this immediate. */
uint16_t uses_by_coissue;
@@ -219,7 +219,7 @@ fs_visitor::opt_combine_constants()
inst->src[i].type != BRW_REGISTER_TYPE_F)
continue;
- float val = fabsf(inst->src[i].fixed_hw_reg.dw1.f);
+ float val = fabsf(inst->src[i].f);
struct imm *imm = find_imm(&table, val);
if (imm) {
@@ -268,7 +268,7 @@ fs_visitor::opt_combine_constants()
/* Insert MOVs to load the constant values into GRFs. */
- fs_reg reg(GRF, alloc.allocate(dispatch_width / 8));
+ fs_reg reg(VGRF, alloc.allocate(dispatch_width / 8));
reg.stride = 0;
for (int i = 0; i < table.len; i++) {
struct imm *imm = &table.imm[i];
@@ -280,12 +280,12 @@ fs_visitor::opt_combine_constants()
const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
ibld.MOV(reg, fs_reg(imm->val));
- imm->reg = reg.reg;
+ imm->nr = reg.nr;
imm->subreg_offset = reg.subreg_offset;
reg.subreg_offset += sizeof(float);
if ((unsigned)reg.subreg_offset == dispatch_width * sizeof(float)) {
- reg.reg = alloc.allocate(dispatch_width / 8);
+ reg.nr = alloc.allocate(dispatch_width / 8);
reg.subreg_offset = 0;
}
}
@@ -295,13 +295,12 @@ fs_visitor::opt_combine_constants()
for (int i = 0; i < table.len; i++) {
foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
fs_reg *reg = link->reg;
- reg->file = GRF;
- reg->reg = table.imm[i].reg;
+ reg->file = VGRF;
+ reg->nr = table.imm[i].nr;
reg->subreg_offset = table.imm[i].subreg_offset;
reg->stride = 0;
- reg->negate = signbit(reg->fixed_hw_reg.dw1.f) !=
- signbit(table.imm[i].val);
- assert(fabsf(reg->fixed_hw_reg.dw1.f) == table.imm[i].val);
+ reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
+ assert(fabsf(reg->f) == table.imm[i].val);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 26204827156..426ea57d8f9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -154,7 +154,7 @@ fs_copy_prop_dataflow::setup_initial_values()
/* Initialize the COPY and KILL sets. */
foreach_block (block, cfg) {
foreach_inst_in_block(fs_inst, inst, block) {
- if (inst->dst.file != GRF)
+ if (inst->dst.file != VGRF)
continue;
/* Mark ACP entries which are killed by this instruction. */
@@ -278,20 +278,20 @@ is_logic_op(enum opcode opcode)
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
- if (inst->src[arg].file != GRF)
+ if (inst->src[arg].file != VGRF)
return false;
if (entry->src.file == IMM)
return false;
- assert(entry->src.file == GRF || entry->src.file == UNIFORM ||
+ assert(entry->src.file == VGRF || entry->src.file == UNIFORM ||
entry->src.file == ATTR);
if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD)
return false;
- assert(entry->dst.file == GRF);
- if (inst->src[arg].reg != entry->dst.reg)
+ assert(entry->dst.file == VGRF);
+ if (inst->src[arg].nr != entry->dst.nr)
return false;
/* Bail if inst is reading a range that isn't contained in the range
@@ -369,8 +369,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
switch(inst->opcode) {
case BRW_OPCODE_SEL:
if (inst->src[1].file != IMM ||
- inst->src[1].fixed_hw_reg.dw1.f < 0.0 ||
- inst->src[1].fixed_hw_reg.dw1.f > 1.0) {
+ inst->src[1].f < 0.0 ||
+ inst->src[1].f > 1.0) {
return false;
}
break;
@@ -380,19 +380,20 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
}
inst->src[arg].file = entry->src.file;
- inst->src[arg].reg = entry->src.reg;
+ inst->src[arg].nr = entry->src.nr;
inst->src[arg].stride *= entry->src.stride;
inst->saturate = inst->saturate || entry->saturate;
switch (entry->src.file) {
case UNIFORM:
case BAD_FILE:
- case HW_REG:
+ case ARF:
+ case FIXED_GRF:
inst->src[arg].reg_offset = entry->src.reg_offset;
inst->src[arg].subreg_offset = entry->src.subreg_offset;
break;
case ATTR:
- case GRF:
+ case VGRF:
{
/* In this case, we'll just leave the width alone. The source
* register could have different widths depending on how it is
@@ -456,11 +457,11 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
return false;
for (int i = inst->sources - 1; i >= 0; i--) {
- if (inst->src[i].file != GRF)
+ if (inst->src[i].file != VGRF)
continue;
- assert(entry->dst.file == GRF);
- if (inst->src[i].reg != entry->dst.reg)
+ assert(entry->dst.file == VGRF);
+ if (inst->src[i].nr != entry->dst.nr)
continue;
/* Bail if inst is reading a range that isn't contained in the range
@@ -477,14 +478,14 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
if (inst->src[i].abs) {
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
- !brw_abs_immediate(val.type, &val.fixed_hw_reg)) {
+ !brw_abs_immediate(val.type, &val)) {
continue;
}
}
if (inst->src[i].negate) {
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
- !brw_negate_immediate(val.type, &val.fixed_hw_reg)) {
+ !brw_negate_immediate(val.type, &val)) {
continue;
}
}
@@ -605,10 +606,10 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
* anyway.
*/
assert(i == 0);
- if (inst->src[0].fixed_hw_reg.dw1.f != 0.0f) {
+ if (inst->src[0].f != 0.0f) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = val;
- inst->src[0].fixed_hw_reg.dw1.f = 1.0f / inst->src[0].fixed_hw_reg.dw1.f;
+ inst->src[0].f = 1.0f / inst->src[0].f;
progress = true;
}
break;
@@ -652,9 +653,9 @@ static bool
can_propagate_from(fs_inst *inst)
{
return (inst->opcode == BRW_OPCODE_MOV &&
- inst->dst.file == GRF &&
- ((inst->src[0].file == GRF &&
- (inst->src[0].reg != inst->dst.reg ||
+ inst->dst.file == VGRF &&
+ ((inst->src[0].file == VGRF &&
+ (inst->src[0].nr != inst->dst.nr ||
inst->src[0].reg_offset != inst->dst.reg_offset)) ||
inst->src[0].file == ATTR ||
inst->src[0].file == UNIFORM ||
@@ -675,10 +676,10 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
foreach_inst_in_block(fs_inst, inst, block) {
/* Try propagating into this instruction. */
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file != GRF)
+ if (inst->src[i].file != VGRF)
continue;
- foreach_in_list(acp_entry, entry, &acp[inst->src[i].reg % ACP_HASH_SIZE]) {
+ foreach_in_list(acp_entry, entry, &acp[inst->src[i].nr % ACP_HASH_SIZE]) {
if (try_constant_propagate(inst, entry))
progress = true;
@@ -688,8 +689,8 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
}
/* kill the destination from the ACP */
- if (inst->dst.file == GRF) {
- foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.reg % ACP_HASH_SIZE]) {
+ if (inst->dst.file == VGRF) {
+ foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
if (inst->overwrites_reg(entry->dst)) {
entry->remove();
}
@@ -716,14 +717,14 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
entry->regs_written = inst->regs_written;
entry->opcode = inst->opcode;
entry->saturate = inst->saturate;
- acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+ acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
} else if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
- inst->dst.file == GRF) {
+ inst->dst.file == VGRF) {
int offset = 0;
for (int i = 0; i < inst->sources; i++) {
int effective_width = i < inst->header_size ? 8 : inst->exec_size;
int regs_written = effective_width / 8;
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->dst.reg_offset = offset;
@@ -731,7 +732,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
entry->regs_written = regs_written;
entry->opcode = inst->opcode;
if (!entry->dst.equals(inst->src[i])) {
- acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+ acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
} else {
ralloc_free(entry);
}
@@ -774,7 +775,7 @@ fs_visitor::opt_copy_propagate()
for (int i = 0; i < dataflow.num_acp; i++) {
if (BITSET_TEST(dataflow.bd[block->num].livein, i)) {
struct acp_entry *entry = dataflow.acp[i];
- in_acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+ in_acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 3a28c8d591d..8c67caff6e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -110,20 +110,20 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
(xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
} else if (a->opcode == BRW_OPCODE_MUL && a->dst.type == BRW_REGISTER_TYPE_F) {
bool xs0_negate = xs[0].negate;
- bool xs1_negate = xs[1].file == IMM ? xs[1].fixed_hw_reg.dw1.f < 0.0f
+ bool xs1_negate = xs[1].file == IMM ? xs[1].f < 0.0f
: xs[1].negate;
bool ys0_negate = ys[0].negate;
- bool ys1_negate = ys[1].file == IMM ? ys[1].fixed_hw_reg.dw1.f < 0.0f
+ bool ys1_negate = ys[1].file == IMM ? ys[1].f < 0.0f
: ys[1].negate;
- float xs1_imm = xs[1].fixed_hw_reg.dw1.f;
- float ys1_imm = ys[1].fixed_hw_reg.dw1.f;
+ float xs1_imm = xs[1].f;
+ float ys1_imm = ys[1].f;
xs[0].negate = false;
xs[1].negate = false;
ys[0].negate = false;
ys[1].negate = false;
- xs[1].fixed_hw_reg.dw1.f = fabsf(xs[1].fixed_hw_reg.dw1.f);
- ys[1].fixed_hw_reg.dw1.f = fabsf(ys[1].fixed_hw_reg.dw1.f);
+ xs[1].f = fabsf(xs[1].f);
+ ys[1].f = fabsf(ys[1].f);
bool ret = (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
(xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
@@ -132,8 +132,8 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
xs[1].negate = xs[1].file == IMM ? false : xs1_negate;
ys[0].negate = ys0_negate;
ys[1].negate = ys[1].file == IMM ? false : ys1_negate;
- xs[1].fixed_hw_reg.dw1.f = xs1_imm;
- ys[1].fixed_hw_reg.dw1.f = ys1_imm;
+ xs[1].f = xs1_imm;
+ ys[1].f = ys1_imm;
*negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate);
return ret;
@@ -196,7 +196,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
header_size = 0;
}
- assert(src.file == GRF);
+ assert(src.file == VGRF);
payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources);
for (int i = 0; i < header_size; i++) {
payload[i] = src;
@@ -226,7 +226,8 @@ fs_visitor::opt_cse_local(bblock_t *block)
foreach_inst_in_block(fs_inst, inst, block) {
/* Skip some cases. */
if (is_expression(this, inst) && !inst->is_partial_write() &&
- (inst->dst.file != HW_REG || inst->dst.is_null()))
+ ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||
+ inst->dst.is_null()))
{
bool found = false;
bool negate = false;
@@ -262,7 +263,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
.at(block, entry->generator->next);
int written = entry->generator->regs_written;
- entry->tmp = fs_reg(GRF, alloc.allocate(written),
+ entry->tmp = fs_reg(VGRF, alloc.allocate(written),
entry->generator->dst.type);
create_copy_instr(ibld, entry->generator, entry->tmp, false);
@@ -320,7 +321,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
/* Kill any AEB entries using registers that don't get reused any
* more -- a sure sign they'll fail operands_match().
*/
- if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
+ if (src_reg->file == VGRF && virtual_grf_end[src_reg->nr] < ip) {
entry->remove();
ralloc_free(entry);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 4b5548a9dc5..a50cf6f24f1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -52,7 +52,7 @@ fs_visitor::dead_code_eliminate()
sizeof(BITSET_WORD));
foreach_inst_in_block_reverse(fs_inst, inst, block) {
- if (inst->dst.file == GRF && !inst->has_side_effects()) {
+ if (inst->dst.file == VGRF && !inst->has_side_effects()) {
bool result_live = false;
if (inst->regs_written == 1) {
@@ -96,7 +96,7 @@ fs_visitor::dead_code_eliminate()
continue;
}
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
if (!inst->is_partial_write()) {
int var = live_intervals->var_from_reg(inst->dst);
for (int i = 0; i < inst->regs_written; i++) {
@@ -105,12 +105,12 @@ fs_visitor::dead_code_eliminate()
}
}
- if (inst->writes_flag()) {
+ if (inst->writes_flag() && !inst->predicate) {
BITSET_CLEAR(flag_live, inst->flag_subreg);
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
int var = live_intervals->var_from_reg(inst->src[i]);
for (int j = 0; j < inst->regs_read(i); j++) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e207a77fdc1..139cda3ca59 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -33,22 +33,25 @@
#include "brw_fs.h"
#include "brw_cfg.h"
-static uint32_t brw_file_from_reg(fs_reg *reg)
+static enum brw_reg_file
+brw_file_from_reg(fs_reg *reg)
{
switch (reg->file) {
- case GRF:
+ case ARF:
+ return BRW_ARCHITECTURE_REGISTER_FILE;
+ case FIXED_GRF:
+ case VGRF:
return BRW_GENERAL_REGISTER_FILE;
case MRF:
return BRW_MESSAGE_REGISTER_FILE;
case IMM:
return BRW_IMMEDIATE_VALUE;
case BAD_FILE:
- case HW_REG:
case ATTR:
case UNIFORM:
unreachable("not reached");
}
- return 0;
+ return BRW_ARCHITECTURE_REGISTER_FILE;
}
static struct brw_reg
@@ -58,13 +61,13 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
switch (reg->file) {
case MRF:
- assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF(gen));
+ assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(gen));
/* Fallthrough */
- case GRF:
+ case VGRF:
if (reg->stride == 0) {
- brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
+ brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
} else if (inst->exec_size < 8) {
- brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
+ brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0);
brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
inst->exec_size, reg->stride);
} else {
@@ -77,12 +80,14 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
* So, for registers with width > 8, we have to use a width of 8
* and trust the compression state to sort out the exec size.
*/
- brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
+ brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0);
brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
}
brw_reg = retype(brw_reg, reg->type);
brw_reg = byte_offset(brw_reg, reg->subreg_offset);
+ brw_reg.abs = reg->abs;
+ brw_reg.negate = reg->negate;
break;
case IMM:
assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V ||
@@ -91,30 +96,33 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
switch (reg->type) {
case BRW_REGISTER_TYPE_F:
- brw_reg = brw_imm_f(reg->fixed_hw_reg.dw1.f);
+ brw_reg = brw_imm_f(reg->f);
break;
case BRW_REGISTER_TYPE_D:
- brw_reg = brw_imm_d(reg->fixed_hw_reg.dw1.d);
+ brw_reg = brw_imm_d(reg->d);
break;
case BRW_REGISTER_TYPE_UD:
- brw_reg = brw_imm_ud(reg->fixed_hw_reg.dw1.ud);
+ brw_reg = brw_imm_ud(reg->ud);
break;
case BRW_REGISTER_TYPE_W:
- brw_reg = brw_imm_w(reg->fixed_hw_reg.dw1.d);
+ brw_reg = brw_imm_w(reg->d);
break;
case BRW_REGISTER_TYPE_UW:
- brw_reg = brw_imm_uw(reg->fixed_hw_reg.dw1.ud);
+ brw_reg = brw_imm_uw(reg->ud);
break;
case BRW_REGISTER_TYPE_VF:
- brw_reg = brw_imm_vf(reg->fixed_hw_reg.dw1.ud);
+ brw_reg = brw_imm_vf(reg->ud);
+ break;
+ case BRW_REGISTER_TYPE_V:
+ brw_reg = brw_imm_v(reg->ud);
break;
default:
unreachable("not reached");
}
break;
- case HW_REG:
- assert(reg->type == reg->fixed_hw_reg.type);
- brw_reg = reg->fixed_hw_reg;
+ case ARF:
+ case FIXED_GRF:
+ brw_reg = *static_cast<struct brw_reg *>(reg);
break;
case BAD_FILE:
/* Probably unused. */
@@ -124,10 +132,6 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
case UNIFORM:
unreachable("not reached");
}
- if (reg->abs)
- brw_reg = brw_abs(brw_reg);
- if (reg->negate)
- brw_reg = negate(brw_reg);
return brw_reg;
}
@@ -383,6 +387,9 @@ fs_generator::generate_urb_read(fs_inst *inst,
brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
+ if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT)
+ brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
+
brw_inst_set_mlen(p->devinfo, send, inst->mlen);
brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
brw_inst_set_header_present(p->devinfo, send, true);
@@ -658,7 +665,7 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
src,
- surf_index.dw1.ud,
+ surf_index.ud,
0,
GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
rlen, /* response length */
@@ -667,7 +674,7 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
simd_mode,
BRW_SAMPLER_RETURN_FORMAT_SINT32);
- brw_mark_surface_used(prog_data, surf_index.dw1.ud);
+ brw_mark_surface_used(prog_data, surf_index.ud);
}
void
@@ -741,6 +748,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
+ case SHADER_OPCODE_TXF_CMS_W:
+ assert(devinfo->gen >= 9);
+ msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
+ break;
case SHADER_OPCODE_TXF_CMS:
if (devinfo->gen >= 7)
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@@ -905,7 +916,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
: prog_data->binding_table.texture_start;
if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
- uint32_t sampler = sampler_index.dw1.ud;
+ uint32_t sampler = sampler_index.ud;
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
@@ -1172,16 +1183,14 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
- uint32_t surf_index = index.dw1.ud;
+ uint32_t surf_index = index.ud;
assert(offset.file == BRW_IMMEDIATE_VALUE &&
offset.type == BRW_REGISTER_TYPE_UD);
- uint32_t read_offset = offset.dw1.ud;
+ uint32_t read_offset = offset.ud;
brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
read_offset, surf_index);
-
- brw_mark_surface_used(prog_data, surf_index);
}
void
@@ -1223,7 +1232,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
if (index.file == BRW_IMMEDIATE_VALUE) {
- uint32_t surf_index = index.dw1.ud;
+ uint32_t surf_index = index.ud;
brw_push_insn_state(p);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
@@ -1242,9 +1251,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
-
- brw_mark_surface_used(prog_data, surf_index);
-
} else {
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1274,11 +1280,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
0);
brw_pop_insn_state(p);
-
- /* visitor knows more than we do about the surface limit required,
- * so has already done marking.
- */
-
}
}
@@ -1294,7 +1295,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
- uint32_t surf_index = index.dw1.ud;
+ uint32_t surf_index = index.ud;
uint32_t simd_mode, rlen, msg_type;
if (dispatch_width == 16) {
@@ -1345,8 +1346,6 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
inst->header_size != 0,
simd_mode,
return_format);
-
- brw_mark_surface_used(prog_data, surf_index);
}
void
@@ -1376,7 +1375,7 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
if (index.file == BRW_IMMEDIATE_VALUE) {
- uint32_t surf_index = index.dw1.ud;
+ uint32_t surf_index = index.ud;
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
@@ -1391,8 +1390,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
simd_mode,
0);
- brw_mark_surface_used(prog_data, surf_index);
-
} else {
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1423,10 +1420,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
false /* header */,
simd_mode,
0);
-
- /* visitor knows more than we do about the surface limit required,
- * so has already done marking.
- */
}
}
@@ -2050,6 +2043,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXL:
@@ -2067,7 +2061,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case FS_OPCODE_DDY_COARSE:
case FS_OPCODE_DDY_FINE:
assert(src[1].file == BRW_IMMEDIATE_VALUE);
- generate_ddy(inst->opcode, dst, src[0], src[1].dw1.ud);
+ generate_ddy(inst->opcode, dst, src[0], src[1].ud);
break;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
@@ -2086,6 +2080,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_URB_READ_SIMD8:
+ case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
generate_urb_read(inst, dst, src[0]);
break;
@@ -2135,37 +2130,37 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud,
+ brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
inst->mlen, !inst->dst.is_null());
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1],
- inst->mlen, src[2].dw1.ud);
+ inst->mlen, src[2].ud);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1],
- inst->mlen, src[2].dw1.ud);
+ inst->mlen, src[2].ud);
break;
case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1],
- src[2].dw1.ud, inst->mlen, !inst->dst.is_null());
+ src[2].ud, inst->mlen, !inst->dst.is_null());
break;
case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1],
- inst->mlen, src[2].dw1.ud);
+ inst->mlen, src[2].ud);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].dw1.ud);
+ brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud);
break;
case SHADER_OPCODE_MEMORY_FENCE:
@@ -2267,6 +2262,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
brw_set_uip_jip(p);
annotation_finalize(&annotation, p->next_insn_offset);
+#ifndef NDEBUG
+ bool validated = brw_validate_instructions(p, start_offset, &annotation);
+#else
+ if (unlikely(debug_flag))
+ brw_validate_instructions(p, start_offset, &annotation);
+#endif
+
int before_size = p->next_insn_offset - start_offset;
brw_compact_instructions(p, start_offset, annotation.ann_count,
annotation.ann);
@@ -2282,8 +2284,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
dump_assembly(p->store, annotation.ann_count, annotation.ann,
p->devinfo);
- ralloc_free(annotation.ann);
+ ralloc_free(annotation.mem_ctx);
}
+ assert(validated);
compiler->shader_debug_log(log_data,
"%s SIMD%d shader: %d inst, %d loops, %u cycles, "
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index ce066a9778e..80fb8c28f81 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -117,7 +117,7 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
/* The def[] bitset marks when an initialization in a block completely
* screens off previous updates of that variable (VGRF channel).
*/
- if (inst->dst.file == GRF && !inst->is_partial_write()) {
+ if (inst->dst.file == VGRF && !inst->is_partial_write()) {
if (!BITSET_TEST(bd->use, var))
BITSET_SET(bd->def, var);
}
@@ -149,7 +149,7 @@ fs_live_variables::setup_def_use()
for (unsigned int i = 0; i < inst->sources; i++) {
fs_reg reg = inst->src[i];
- if (reg.file != GRF)
+ if (reg.file != VGRF)
continue;
for (int j = 0; j < inst->regs_read(i); j++) {
@@ -172,7 +172,7 @@ fs_live_variables::setup_def_use()
}
/* Set def[] for this instruction */
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
fs_reg reg = inst->dst;
for (int j = 0; j < inst->regs_written; j++) {
setup_one_write(bd, inst, ip, reg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
index c7457069ede..96cadea96aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
@@ -68,7 +68,7 @@ public:
bool vars_interfere(int a, int b);
int var_from_reg(const fs_reg &reg) const
{
- return var_from_vgrf[reg.reg] + reg.reg_offset;
+ return var_from_vgrf[reg.nr] + reg.reg_offset;
}
/** Map from virtual GRF number to index in block_data arrays. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 486741bea31..a47b6ce50cc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -28,6 +28,7 @@
#include "program/prog_to_nir.h"
#include "brw_fs.h"
#include "brw_fs_surface_builder.h"
+#include "brw_vec4_gs_visitor.h"
#include "brw_nir.h"
#include "brw_fs_surface_builder.h"
#include "brw_vec4_gs_visitor.h"
@@ -102,7 +103,8 @@ fs_visitor::nir_setup_outputs()
switch (stage) {
case MESA_SHADER_VERTEX:
- for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
+ case MESA_SHADER_GEOMETRY:
+ for (int i = 0; i < type_size_vec4(var->type); i++) {
int output = var->data.location + i;
this->outputs[output] = offset(reg, bld, 4 * i);
this->output_components[output] = vector_elements;
@@ -260,6 +262,10 @@ void
fs_visitor::nir_emit_system_values()
{
nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
+ for (unsigned i = 0; i < SYSTEM_VALUE_MAX; i++) {
+ nir_system_values[i] = fs_reg();
+ }
+
nir_foreach_overload(nir, overload) {
assert(strcmp(overload->function->name, "main") == 0);
assert(overload->impl);
@@ -270,7 +276,11 @@ fs_visitor::nir_emit_system_values()
void
fs_visitor::nir_emit_impl(nir_function_impl *impl)
{
- nir_locals = reralloc(mem_ctx, nir_locals, fs_reg, impl->reg_alloc);
+ nir_locals = ralloc_array(mem_ctx, fs_reg, impl->reg_alloc);
+ for (unsigned i = 0; i < impl->reg_alloc; i++) {
+ nir_locals[i] = fs_reg();
+ }
+
foreach_list_typed(nir_register, reg, node, &impl->registers) {
unsigned array_elems =
reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
@@ -358,7 +368,22 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
break;
case nir_instr_type_intrinsic:
- nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr));
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+ break;
+ case MESA_SHADER_GEOMETRY:
+ nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+ break;
+ case MESA_SHADER_FRAGMENT:
+ nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+ break;
+ case MESA_SHADER_COMPUTE:
+ nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+ break;
+ default:
+ unreachable("unsupported shader stage");
+ }
break;
case nir_instr_type_tex:
@@ -1060,18 +1085,17 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
fs_reg image(UNIFORM, deref->var->data.driver_location,
BRW_REGISTER_TYPE_UD);
- if (deref->deref.child) {
- const nir_deref_array *deref_array =
- nir_deref_as_array(deref->deref.child);
- assert(deref->deref.child->deref_type == nir_deref_type_array &&
- deref_array->deref.child == NULL);
- const unsigned size = glsl_get_length(deref->var->type);
+ for (const nir_deref *tail = &deref->deref; tail->child;
+ tail = tail->child) {
+ const nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+ assert(tail->child->deref_type == nir_deref_type_array);
+ const unsigned size = glsl_get_length(tail->type);
+ const unsigned element_size = type_size_scalar(deref_array->deref.type);
const unsigned base = MIN2(deref_array->base_offset, size - 1);
-
- image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE);
+ image = offset(image, bld, base * element_size);
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
- fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type));
+ fs_reg tmp = vgrf(glsl_type::int_type);
if (devinfo->gen == 7 && !devinfo->is_haswell) {
/* IVB hangs when trying to access an invalid surface index with
@@ -1082,15 +1106,18 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
* of the possible outcomes of the hang. Clamp the index to
* prevent access outside of the array bounds.
*/
- bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect),
- BRW_REGISTER_TYPE_UD),
+ bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect),
+ BRW_REGISTER_TYPE_UD),
fs_reg(size - base - 1), BRW_CONDITIONAL_L);
} else {
- bld.MOV(*tmp, get_nir_src(deref_array->indirect));
+ bld.MOV(tmp, get_nir_src(deref_array->indirect));
}
- bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE));
- image.reladdr = tmp;
+ bld.MUL(tmp, tmp, fs_reg(element_size));
+ if (image.reladdr)
+ bld.ADD(*image.reladdr, *image.reladdr, tmp);
+ else
+ image.reladdr = new(mem_ctx) fs_reg(tmp);
}
}
@@ -1108,7 +1135,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
fs_inst *new_inst = new(mem_ctx) fs_inst(inst);
new_inst->dst = offset(new_inst->dst, bld, i);
for (unsigned j = 0; j < new_inst->sources; j++)
- if (new_inst->src[j].file == GRF)
+ if (new_inst->src[j].file == VGRF)
new_inst->src[j] = offset(new_inst->src[j], bld, i);
bld.emit(new_inst);
@@ -1194,16 +1221,498 @@ emit_pixel_interpolater_send(const fs_builder &bld,
return inst;
}
+/**
+ * Computes 1 << x, given a D/UD register containing some value x.
+ */
+static fs_reg
+intexp2(const fs_builder &bld, const fs_reg &x)
+{
+ assert(x.type == BRW_REGISTER_TYPE_UD || x.type == BRW_REGISTER_TYPE_D);
+
+ fs_reg result = bld.vgrf(x.type, 1);
+ fs_reg one = bld.vgrf(x.type, 1);
+
+ bld.MOV(one, retype(fs_reg(1), one.type));
+ bld.SHL(result, one, x);
+ return result;
+}
+
void
-fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
+fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src)
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ struct brw_gs_prog_data *gs_prog_data =
+ (struct brw_gs_prog_data *) prog_data;
+
+ /* We can only do EndPrimitive() functionality when the control data
+ * consists of cut bits. Fortunately, the only time it isn't is when the
+ * output type is points, in which case EndPrimitive() is a no-op.
+ */
+ if (gs_prog_data->control_data_format !=
+ GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
+ return;
+ }
+
+ /* Cut bits use one bit per vertex. */
+ assert(gs_compile->control_data_bits_per_vertex == 1);
+
+ fs_reg vertex_count = get_nir_src(vertex_count_nir_src);
+ vertex_count.type = BRW_REGISTER_TYPE_UD;
+
+ /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting
+ * vertex n, 0 otherwise. So all we need to do here is mark bit
+ * (vertex_count - 1) % 32 in the cut_bits register to indicate that
+ * EndPrimitive() was called after emitting vertex (vertex_count - 1);
+ * vec4_gs_visitor::emit_control_data_bits() will take care of the rest.
+ *
+ * Note that if EndPrimitive() is called before emitting any vertices, this
+ * will cause us to set bit 31 of the control_data_bits register to 1.
+ * That's fine because:
+ *
+ * - If max_vertices < 32, then vertex number 31 (zero-based) will never be
+ * output, so the hardware will ignore cut bit 31.
+ *
+ * - If max_vertices == 32, then vertex number 31 is guaranteed to be the
+ * last vertex, so setting cut bit 31 has no effect (since the primitive
+ * is automatically ended when the GS terminates).
+ *
+ * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the
+ * control_data_bits register to 0 when the first vertex is emitted.
+ */
+
+ const fs_builder abld = bld.annotate("end primitive");
+
+ /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
+ fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+ fs_reg mask = intexp2(abld, prev_count);
+ /* Note: we're relying on the fact that the GEN SHL instruction only pays
+ * attention to the lower 5 bits of its second source argument, so on this
+ * architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
+ * ((vertex_count - 1) % 32).
+ */
+ abld.OR(this->control_data_bits, this->control_data_bits, mask);
+}
+
+void
+fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
{
+ assert(stage == MESA_SHADER_GEOMETRY);
+ assert(gs_compile->control_data_bits_per_vertex != 0);
+
+ struct brw_gs_prog_data *gs_prog_data =
+ (struct brw_gs_prog_data *) prog_data;
+
+ const fs_builder abld = bld.annotate("emit control data bits");
+ const fs_builder fwa_bld = bld.exec_all();
+
+ /* We use a single UD register to accumulate control data bits (32 bits
+ * for each of the SIMD8 channels). So we need to write a DWord (32 bits)
+ * at a time.
+ *
+ * Unfortunately, the URB_WRITE_SIMD8 message uses 128-bit (OWord) offsets.
+ * We have select a 128-bit group via the Global and Per-Slot Offsets, then
+ * use the Channel Mask phase to enable/disable which DWord within that
+ * group to write. (Remember, different SIMD8 channels may have emitted
+ * different numbers of vertices, so we may need per-slot offsets.)
+ *
+ * Channel masking presents an annoying problem: we may have to replicate
+ * the data up to 4 times:
+ *
+ * Msg = Handles, Per-Slot Offsets, Channel Masks, Data, Data, Data, Data.
+ *
+ * To avoid penalizing shaders that emit a small number of vertices, we
+ * can avoid these sometimes: if the size of the control data header is
+ * <= 128 bits, then there is only 1 OWord. All SIMD8 channels will land
+ * land in the same 128-bit group, so we can skip per-slot offsets.
+ *
+ * Similarly, if the control data header is <= 32 bits, there is only one
+ * DWord, so we can skip channel masks.
+ */
+ enum opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8;
+
+ fs_reg channel_mask, per_slot_offset;
+
+ if (gs_compile->control_data_header_size_bits > 32) {
+ opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED;
+ channel_mask = vgrf(glsl_type::uint_type);
+ }
+
+ if (gs_compile->control_data_header_size_bits > 128) {
+ opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT;
+ per_slot_offset = vgrf(glsl_type::uint_type);
+ }
+
+ /* Figure out which DWord we're trying to write to using the formula:
+ *
+ * dword_index = (vertex_count - 1) * bits_per_vertex / 32
+ *
+ * Since bits_per_vertex is a power of two, and is known at compile
+ * time, this can be optimized to:
+ *
+ * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
+ */
+ if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) {
+ fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+ unsigned log2_bits_per_vertex =
+ _mesa_fls(gs_compile->control_data_bits_per_vertex);
+ abld.SHR(dword_index, prev_count, fs_reg(6u - log2_bits_per_vertex));
+
+ if (per_slot_offset.file != BAD_FILE) {
+ /* Set the per-slot offset to dword_index / 4, so that we'll write to
+ * the appropriate OWord within the control data header.
+ */
+ abld.SHR(per_slot_offset, dword_index, fs_reg(2u));
+ }
+
+ /* Set the channel masks to 1 << (dword_index % 4), so that we'll
+ * write to the appropriate DWORD within the OWORD.
+ */
+ fs_reg channel = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fwa_bld.AND(channel, dword_index, fs_reg(3u));
+ channel_mask = intexp2(fwa_bld, channel);
+ /* Then the channel masks need to be in bits 23:16. */
+ fwa_bld.SHL(channel_mask, channel_mask, fs_reg(16u));
+ }
+
+ /* Store the control data bits in the message payload and send it. */
+ int mlen = 2;
+ if (channel_mask.file != BAD_FILE)
+ mlen += 4; /* channel masks, plus 3 extra copies of the data */
+ if (per_slot_offset.file != BAD_FILE)
+ mlen++;
+
+ fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, mlen);
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, mlen);
+ int i = 0;
+ sources[i++] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ if (per_slot_offset.file != BAD_FILE)
+ sources[i++] = per_slot_offset;
+ if (channel_mask.file != BAD_FILE)
+ sources[i++] = channel_mask;
+ while (i < mlen) {
+ sources[i++] = this->control_data_bits;
+ }
+
+ abld.LOAD_PAYLOAD(payload, sources, mlen, mlen);
+ fs_inst *inst = abld.emit(opcode, reg_undef, payload);
+ inst->mlen = mlen;
+ /* We need to increment Global Offset by 256-bits to make room for
+ * Broadwell's extra "Vertex Count" payload at the beginning of the
+ * URB entry. Since this is an OWord message, Global Offset is counted
+ * in 128-bit units, so we must set it to 2.
+ */
+ if (gs_prog_data->static_vertex_count == -1)
+ inst->offset = 2;
+}
+
+void
+fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count,
+ unsigned stream_id)
+{
+ /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */
+
+ /* Note: we are calling this *before* increasing vertex_count, so
+ * this->vertex_count == vertex_count - 1 in the formula above.
+ */
+
+ /* Stream mode uses 2 bits per vertex */
+ assert(gs_compile->control_data_bits_per_vertex == 2);
+
+ /* Must be a valid stream */
+ assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);
+
+ /* Control data bits are initialized to 0 so we don't have to set any
+ * bits when sending vertices to stream 0.
+ */
+ if (stream_id == 0)
+ return;
+
+ const fs_builder abld = bld.annotate("set stream control data bits", NULL);
+
+ /* reg::sid = stream_id */
+ fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.MOV(sid, fs_reg(stream_id));
+
+ /* reg:shift_count = 2 * (vertex_count - 1) */
+ fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.SHL(shift_count, vertex_count, fs_reg(1u));
+
+ /* Note: we're relying on the fact that the GEN SHL instruction only pays
+ * attention to the lower 5 bits of its second source argument, so on this
+ * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to
+ * stream_id << ((2 * (vertex_count - 1)) % 32).
+ */
+ fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.SHL(mask, sid, shift_count);
+ abld.OR(this->control_data_bits, this->control_data_bits, mask);
+}
+
+void
+fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
+ unsigned stream_id)
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ struct brw_gs_prog_data *gs_prog_data =
+ (struct brw_gs_prog_data *) prog_data;
+
+ fs_reg vertex_count = get_nir_src(vertex_count_nir_src);
+ vertex_count.type = BRW_REGISTER_TYPE_UD;
+
+ /* Haswell and later hardware ignores the "Render Stream Select" bits
+ * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled,
+ * and instead sends all primitives down the pipeline for rasterization.
+ * If the SOL stage is enabled, "Render Stream Select" is honored and
+ * primitives bound to non-zero streams are discarded after stream output.
+ *
+ * Since the only purpose of primives sent to non-zero streams is to
+ * be recorded by transform feedback, we can simply discard all geometry
+ * bound to these streams when transform feedback is disabled.
+ */
+ if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
+ return;
+
+ /* If we're outputting 32 control data bits or less, then we can wait
+ * until the shader is over to output them all. Otherwise we need to
+ * output them as we go. Now is the time to do it, since we're about to
+ * output the vertex_count'th vertex, so it's guaranteed that the
+ * control data bits associated with the (vertex_count - 1)th vertex are
+ * correct.
+ */
+ if (gs_compile->control_data_header_size_bits > 32) {
+ const fs_builder abld =
+ bld.annotate("emit vertex: emit control data bits");
+
+ /* Only emit control data bits if we've finished accumulating a batch
+ * of 32 bits. This is the case when:
+ *
+ * (vertex_count * bits_per_vertex) % 32 == 0
+ *
+ * (in other words, when the last 5 bits of vertex_count *
+ * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some
+ * integer n (which is always the case, since bits_per_vertex is
+ * always 1 or 2), this is equivalent to requiring that the last 5-n
+ * bits of vertex_count are 0:
+ *
+ * vertex_count & (2^(5-n) - 1) == 0
+ *
+ * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
+ * equivalent to:
+ *
+ * vertex_count & (32 / bits_per_vertex - 1) == 0
+ *
+ * TODO: If vertex_count is an immediate, we could do some of this math
+ * at compile time...
+ */
+ fs_inst *inst =
+ abld.AND(bld.null_reg_d(), vertex_count,
+ fs_reg(32u / gs_compile->control_data_bits_per_vertex - 1u));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ abld.IF(BRW_PREDICATE_NORMAL);
+ /* If vertex_count is 0, then no control data bits have been
+ * accumulated yet, so we can skip emitting them.
+ */
+ abld.CMP(bld.null_reg_d(), vertex_count, fs_reg(0u),
+ BRW_CONDITIONAL_NEQ);
+ abld.IF(BRW_PREDICATE_NORMAL);
+ emit_gs_control_data_bits(vertex_count);
+ abld.emit(BRW_OPCODE_ENDIF);
+
+ /* Reset control_data_bits to 0 so we can start accumulating a new
+ * batch.
+ *
+ * Note: in the case where vertex_count == 0, this neutralizes the
+ * effect of any call to EndPrimitive() that the shader may have
+ * made before outputting its first vertex.
+ */
+ inst = abld.MOV(this->control_data_bits, fs_reg(0u));
+ inst->force_writemask_all = true;
+ abld.emit(BRW_OPCODE_ENDIF);
+ }
+
+ emit_urb_writes(vertex_count);
+
+ /* In stream mode we have to set control data bits for all vertices
+ * unless we have disabled control data bits completely (which we do
+ * do for GL_POINTS outputs that don't use streams).
+ */
+ if (gs_compile->control_data_header_size_bits > 0 &&
+ gs_prog_data->control_data_format ==
+ GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
+ set_gs_stream_control_data_bits(vertex_count, stream_id);
+ }
+}
+
+void
+fs_visitor::emit_gs_input_load(const fs_reg &dst,
+ const nir_src &vertex_src,
+ unsigned input_offset,
+ unsigned num_components)
+{
+ const brw_vue_prog_data *vue_prog_data = (const brw_vue_prog_data *) prog_data;
+ const unsigned vertex = nir_src_as_const_value(vertex_src)->u[0];
+
+ const unsigned array_stride = vue_prog_data->urb_read_length * 8;
+
+ const bool pushed = 4 * input_offset < array_stride;
+
+ if (input_offset == 0) {
+ /* This is the VUE header, containing VARYING_SLOT_LAYER [.y],
+ * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w].
+ * Only gl_PointSize is available as a GS input, so they must
+ * be asking for that input.
+ */
+ if (pushed) {
+ bld.MOV(dst, fs_reg(ATTR, array_stride * vertex + 3, dst.type));
+ } else {
+ fs_reg tmp = bld.vgrf(dst.type, 4);
+ fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
+ fs_reg(vertex), fs_reg(0));
+ inst->regs_written = 4;
+ bld.MOV(dst, offset(tmp, bld, 3));
+ }
+ } else {
+ if (pushed) {
+ int index = vertex * array_stride + 4 * input_offset;
+ for (unsigned i = 0; i < num_components; i++) {
+ bld.MOV(offset(dst, bld, i), fs_reg(ATTR, index + i, dst.type));
+ }
+ } else {
+ fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
+ fs_reg(vertex), fs_reg(input_offset));
+ inst->regs_written = num_components;
+ }
+ }
+}
+
+void
+fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ assert(stage == MESA_SHADER_VERTEX);
+
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
- bool has_indirect = false;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_vertex_id:
+ unreachable("should be lowered by lower_vertex_id()");
+
+ case nir_intrinsic_load_vertex_id_zero_base:
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_instance_id: {
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ fs_reg val = nir_system_values[sv];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ bld.MOV(dest, val);
+ break;
+ }
+
+ default:
+ nir_emit_intrinsic(bld, instr);
+ break;
+ }
+}
+
+void
+fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
switch (instr->intrinsic) {
+ case nir_intrinsic_load_primitive_id:
+ assert(stage == MESA_SHADER_GEOMETRY);
+ assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
+ retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
+ break;
+
+ case nir_intrinsic_load_input_indirect:
+ case nir_intrinsic_load_input:
+ unreachable("load_input intrinsics are invalid for the GS stage");
+
+ case nir_intrinsic_load_per_vertex_input_indirect:
+ assert(!"Not allowed");
+ case nir_intrinsic_load_per_vertex_input:
+ emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
+ instr->num_components);
+ break;
+
+ case nir_intrinsic_emit_vertex_with_counter:
+ emit_gs_vertex(instr->src[0], instr->const_index[0]);
+ break;
+
+ case nir_intrinsic_end_primitive_with_counter:
+ emit_gs_end_primitive(instr->src[0]);
+ break;
+
+ case nir_intrinsic_set_vertex_count:
+ bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0]));
+ break;
+
+ case nir_intrinsic_load_invocation_id: {
+ fs_reg val = nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ bld.MOV(dest, val);
+ break;
+ }
+
+ default:
+ nir_emit_intrinsic(bld, instr);
+ break;
+ }
+}
+
+void
+fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ assert(stage == MESA_SHADER_FRAGMENT);
+ struct brw_wm_prog_data *wm_prog_data =
+ (struct brw_wm_prog_data *) prog_data;
+
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_front_face:
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+ *emit_frontfacing_interpolation());
+ break;
+
+ case nir_intrinsic_load_sample_pos: {
+ fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
+ assert(sample_pos.file != BAD_FILE);
+ dest.type = sample_pos.type;
+ bld.MOV(dest, sample_pos);
+ bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
+ break;
+ }
+
+ case nir_intrinsic_load_sample_mask_in:
+ case nir_intrinsic_load_sample_id: {
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ fs_reg val = nir_system_values[sv];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ bld.MOV(dest, val);
+ break;
+ }
+
case nir_intrinsic_discard:
case nir_intrinsic_discard_if: {
/* We track our discarded pixels in f0.1. By predicating on it, we can
@@ -1229,6 +1738,248 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
+ case nir_intrinsic_interp_var_at_centroid:
+ case nir_intrinsic_interp_var_at_sample:
+ case nir_intrinsic_interp_var_at_offset: {
+ /* Handle ARB_gpu_shader5 interpolation intrinsics
+ *
+ * It's worth a quick word of explanation as to why we handle the full
+ * variable-based interpolation intrinsic rather than a lowered version
+ * with like we do for other inputs. We have to do that because the way
+ * we set up inputs doesn't allow us to use the already setup inputs for
+ * interpolation. At the beginning of the shader, we go through all of
+ * the input variables and do the initial interpolation and put it in
+ * the nir_inputs array based on its location as determined in
+ * nir_lower_io. If the input isn't used, dead code cleans up and
+ * everything works fine. However, when we get to the ARB_gpu_shader5
+ * interpolation intrinsics, we need to reinterpolate the input
+ * differently. If we used an intrinsic that just had an index it would
+ * only give us the offset into the nir_inputs array. However, this is
+ * useless because that value is post-interpolation and we need
+ * pre-interpolation. In order to get the actual location of the bits
+ * we get from the vertex fetching hardware, we need the variable.
+ */
+ wm_prog_data->pulls_bary = true;
+
+ fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
+ const glsl_interp_qualifier interpolation =
+ (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_interp_var_at_centroid:
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_CENTROID,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(0u),
+ interpolation);
+ break;
+
+ case nir_intrinsic_interp_var_at_sample: {
+ nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
+
+ if (const_sample) {
+ unsigned msg_data = const_sample->i[0] << 4;
+
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(msg_data),
+ interpolation);
+ } else {
+ const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+
+ if (nir_src_is_dynamically_uniform(instr->src[0])) {
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ } else {
+ /* Make a loop that sends a message to the pixel interpolater
+ * for the sample number in each live channel. If there are
+ * multiple channels with the same sample number then these
+ * will be handled simultaneously with a single interation of
+ * the loop.
+ */
+ bld.emit(BRW_OPCODE_DO);
+
+ /* Get the next live sample number into sample_id_reg */
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+ /* Set the flag register so that we can perform the send
+ * message on all channels that have the same sample number
+ */
+ bld.CMP(bld.null_reg_ud(),
+ sample_src, sample_id,
+ BRW_CONDITIONAL_EQ);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ fs_inst *inst =
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dst_xy,
+ fs_reg(), /* src */
+ msg_data,
+ interpolation);
+ set_predicate(BRW_PREDICATE_NORMAL, inst);
+
+ /* Continue the loop if there are any live channels left */
+ set_predicate_inv(BRW_PREDICATE_NORMAL,
+ true, /* inverse */
+ bld.emit(BRW_OPCODE_WHILE));
+ }
+ }
+
+ break;
+ }
+
+ case nir_intrinsic_interp_var_at_offset: {
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+
+ if (const_offset) {
+ unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
+ unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
+
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+ dst_xy,
+ fs_reg(), /* src */
+ fs_reg(off_x | (off_y << 4)),
+ interpolation);
+ } else {
+ fs_reg src = vgrf(glsl_type::ivec2_type);
+ fs_reg offset_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_F);
+ for (int i = 0; i < 2; i++) {
+ fs_reg temp = vgrf(glsl_type::float_type);
+ bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
+ fs_reg itemp = vgrf(glsl_type::int_type);
+ bld.MOV(itemp, temp); /* float to int */
+
+ /* Clamp the upper end of the range to +7/16.
+ * ARB_gpu_shader5 requires that we support a maximum offset
+ * of +0.5, which isn't representable in a S0.4 value -- if
+ * we didn't clamp it, we'd end up with -8/16, which is the
+ * opposite of what the shader author wanted.
+ *
+ * This is legal due to ARB_gpu_shader5's quantization
+ * rules:
+ *
+ * "Not all values of <offset> may be supported; x and y
+ * offsets may be rounded to fixed-point values with the
+ * number of fraction bits given by the
+ * implementation-dependent constant
+ * FRAGMENT_INTERPOLATION_OFFSET_BITS"
+ */
+ set_condmod(BRW_CONDITIONAL_L,
+ bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
+ }
+
+ const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+ emit_pixel_interpolater_send(bld,
+ opcode,
+ dst_xy,
+ src,
+ fs_reg(0u),
+ interpolation);
+ }
+ break;
+ }
+
+ default:
+ unreachable("Invalid intrinsic");
+ }
+
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
+ src.type = dest.type;
+
+ bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
+ dest = offset(dest, bld, 1);
+ }
+ break;
+ }
+ default:
+ nir_emit_intrinsic(bld, instr);
+ break;
+ }
+}
+
+void
+fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ assert(stage == MESA_SHADER_COMPUTE);
+ struct brw_cs_prog_data *cs_prog_data =
+ (struct brw_cs_prog_data *) prog_data;
+
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_barrier:
+ emit_barrier();
+ cs_prog_data->uses_barrier = true;
+ break;
+
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_work_group_id: {
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ fs_reg val = nir_system_values[sv];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ for (unsigned i = 0; i < 3; i++)
+ bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+ break;
+ }
+
+ case nir_intrinsic_load_num_work_groups: {
+ const unsigned surface =
+ cs_prog_data->binding_table.work_groups_start;
+
+ cs_prog_data->uses_num_work_groups = true;
+
+ fs_reg surf_index = fs_reg(surface);
+ brw_mark_surface_used(prog_data, surface);
+
+ /* Read the 3 GLuint components of gl_NumWorkGroups */
+ for (unsigned i = 0; i < 3; i++) {
+ fs_reg read_result =
+ emit_untyped_read(bld, surf_index,
+ fs_reg(i << 2),
+ 1 /* dims */, 1 /* size */,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ bld.MOV(dest, read_result);
+ dest = offset(dest, bld, 1);
+ }
+ break;
+ }
+
+ default:
+ nir_emit_intrinsic(bld, instr);
+ break;
+ }
+}
+
+void
+fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
+{
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ bool has_indirect = false;
+
+ switch (instr->intrinsic) {
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_dec:
case nir_intrinsic_atomic_counter_read: {
@@ -1324,6 +2075,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
+ case nir_intrinsic_memory_barrier_atomic_counter:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
case nir_intrinsic_memory_barrier: {
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 16 / dispatch_width);
bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
@@ -1331,6 +2085,29 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier_shared:
+ /* We treat these workgroup-level barriers as no-ops. This should be
+ * safe at present and as long as:
+ *
+ * - Memory access instructions are not subsequently reordered by the
+ * compiler back-end.
+ *
+ * - All threads from a given compute shader workgroup fit within a
+ * single subslice and therefore talk to the same HDC shared unit
+ * what supposedly guarantees ordering and coherency between threads
+ * from the same workgroup. This may change in the future when we
+ * start splitting workgroups across multiple subslices.
+ *
+ * - The context is not in fault-and-stream mode, which could cause
+ * memory transactions (including to SLM) prior to the barrier to be
+ * replayed after the barrier if a pagefault occurs. This shouldn't
+ * be a problem up to and including SKL because fault-and-stream is
+ * not usable due to hardware issues, but that's likely to change in
+ * the future.
+ */
+ break;
+
case nir_intrinsic_shader_clock: {
/* We cannot do anything if there is an event, so ignore it for now */
fs_reg shader_clock = get_timestamp(bld);
@@ -1390,44 +2167,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
break;
- case nir_intrinsic_load_front_face:
- bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
- *emit_frontfacing_interpolation());
- break;
-
- case nir_intrinsic_load_vertex_id:
- unreachable("should be lowered by lower_vertex_id()");
-
- case nir_intrinsic_load_primitive_id:
- assert(stage == MESA_SHADER_GEOMETRY);
- assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
- bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
- retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
- break;
-
- case nir_intrinsic_load_vertex_id_zero_base:
- case nir_intrinsic_load_base_vertex:
- case nir_intrinsic_load_instance_id:
- case nir_intrinsic_load_invocation_id:
- case nir_intrinsic_load_sample_mask_in:
- case nir_intrinsic_load_sample_id: {
- gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
- fs_reg val = nir_system_values[sv];
- assert(val.file != BAD_FILE);
- dest.type = val.type;
- bld.MOV(dest, val);
- break;
- }
-
- case nir_intrinsic_load_sample_pos: {
- fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
- assert(sample_pos.file != BAD_FILE);
- dest.type = sample_pos.type;
- bld.MOV(dest, sample_pos);
- bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
- break;
- }
-
case nir_intrinsic_load_uniform_indirect:
has_indirect = true;
/* fallthrough */
@@ -1454,8 +2193,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg surf_index;
if (const_index) {
- surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
- const_index->u[0]);
+ const unsigned index = stage_prog_data->binding_table.ubo_start +
+ const_index->u[0];
+ surf_index = fs_reg(index);
+ brw_mark_surface_used(prog_data, index);
} else {
/* The block index is not a constant. Evaluate the index expression
* per-channel and add the base UBO index; we have to select a value
@@ -1579,177 +2320,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- /* Handle ARB_gpu_shader5 interpolation intrinsics
- *
- * It's worth a quick word of explanation as to why we handle the full
- * variable-based interpolation intrinsic rather than a lowered version
- * with like we do for other inputs. We have to do that because the way
- * we set up inputs doesn't allow us to use the already setup inputs for
- * interpolation. At the beginning of the shader, we go through all of
- * the input variables and do the initial interpolation and put it in
- * the nir_inputs array based on its location as determined in
- * nir_lower_io. If the input isn't used, dead code cleans up and
- * everything works fine. However, when we get to the ARB_gpu_shader5
- * interpolation intrinsics, we need to reinterpolate the input
- * differently. If we used an intrinsic that just had an index it would
- * only give us the offset into the nir_inputs array. However, this is
- * useless because that value is post-interpolation and we need
- * pre-interpolation. In order to get the actual location of the bits
- * we get from the vertex fetching hardware, we need the variable.
- */
- case nir_intrinsic_interp_var_at_centroid:
- case nir_intrinsic_interp_var_at_sample:
- case nir_intrinsic_interp_var_at_offset: {
- assert(stage == MESA_SHADER_FRAGMENT);
-
- ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
-
- fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
- const glsl_interp_qualifier interpolation =
- (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
-
- switch (instr->intrinsic) {
- case nir_intrinsic_interp_var_at_centroid:
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy,
- fs_reg(), /* src */
- fs_reg(0u),
- interpolation);
- break;
-
- case nir_intrinsic_interp_var_at_sample: {
- nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
-
- if (const_sample) {
- unsigned msg_data = const_sample->i[0] << 4;
-
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
- fs_reg(), /* src */
- fs_reg(msg_data),
- interpolation);
- } else {
- const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
- BRW_REGISTER_TYPE_UD);
-
- if (nir_src_is_dynamically_uniform(instr->src[0])) {
- const fs_reg sample_id = bld.emit_uniformize(sample_src);
- const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
- fs_reg(), /* src */
- msg_data,
- interpolation);
- } else {
- /* Make a loop that sends a message to the pixel interpolater
- * for the sample number in each live channel. If there are
- * multiple channels with the same sample number then these
- * will be handled simultaneously with a single interation of
- * the loop.
- */
- bld.emit(BRW_OPCODE_DO);
-
- /* Get the next live sample number into sample_id_reg */
- const fs_reg sample_id = bld.emit_uniformize(sample_src);
-
- /* Set the flag register so that we can perform the send
- * message on all channels that have the same sample number
- */
- bld.CMP(bld.null_reg_ud(),
- sample_src, sample_id,
- BRW_CONDITIONAL_EQ);
- const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
- fs_inst *inst =
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
- fs_reg(), /* src */
- msg_data,
- interpolation);
- set_predicate(BRW_PREDICATE_NORMAL, inst);
-
- /* Continue the loop if there are any live channels left */
- set_predicate_inv(BRW_PREDICATE_NORMAL,
- true, /* inverse */
- bld.emit(BRW_OPCODE_WHILE));
- }
- }
-
- break;
- }
-
- case nir_intrinsic_interp_var_at_offset: {
- nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-
- if (const_offset) {
- unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
- unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
-
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
- dst_xy,
- fs_reg(), /* src */
- fs_reg(off_x | (off_y << 4)),
- interpolation);
- } else {
- fs_reg src = vgrf(glsl_type::ivec2_type);
- fs_reg offset_src = retype(get_nir_src(instr->src[0]),
- BRW_REGISTER_TYPE_F);
- for (int i = 0; i < 2; i++) {
- fs_reg temp = vgrf(glsl_type::float_type);
- bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
- fs_reg itemp = vgrf(glsl_type::int_type);
- bld.MOV(itemp, temp); /* float to int */
-
- /* Clamp the upper end of the range to +7/16.
- * ARB_gpu_shader5 requires that we support a maximum offset
- * of +0.5, which isn't representable in a S0.4 value -- if
- * we didn't clamp it, we'd end up with -8/16, which is the
- * opposite of what the shader author wanted.
- *
- * This is legal due to ARB_gpu_shader5's quantization
- * rules:
- *
- * "Not all values of <offset> may be supported; x and y
- * offsets may be rounded to fixed-point values with the
- * number of fraction bits given by the
- * implementation-dependent constant
- * FRAGMENT_INTERPOLATION_OFFSET_BITS"
- */
- set_condmod(BRW_CONDITIONAL_L,
- bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
- }
-
- const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
- emit_pixel_interpolater_send(bld,
- opcode,
- dst_xy,
- src,
- fs_reg(0u),
- interpolation);
- }
- break;
- }
-
- default:
- unreachable("Invalid intrinsic");
- }
-
- for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
- src.type = dest.type;
-
- bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
- dest = offset(dest, bld, 1);
- }
- break;
- }
-
case nir_intrinsic_store_ssbo_indirect:
has_indirect = true;
/* fallthrough */
@@ -1831,23 +2401,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_barrier:
- emit_barrier();
- if (stage == MESA_SHADER_COMPUTE)
- ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
- break;
-
- case nir_intrinsic_load_local_invocation_id:
- case nir_intrinsic_load_work_group_id: {
- gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
- fs_reg val = nir_system_values[sv];
- assert(val.file != BAD_FILE);
- dest.type = val.type;
- for (unsigned i = 0; i < 3; i++)
- bld.MOV(offset(dest, bld, i), offset(val, bld, i));
- break;
- }
-
case nir_intrinsic_ssbo_atomic_add:
nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
break;
@@ -1888,44 +2441,30 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg source = fs_reg(0);
int mlen = 1 * reg_width;
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+
+ /* A resinfo's sampler message is used to get the buffer size.
+ * The SIMD8's writeback message consists of four registers and
+ * SIMD16's writeback message consists of 8 destination registers
+ * (two per each component), although we are only interested on the
+ * first component, where resinfo returns the buffer size for
+ * SURFTYPE_BUFFER.
+ */
+ int regs_written = 4 * mlen;
+ fs_reg src_payload = fs_reg(VGRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_UD);
bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
-
- fs_reg surf_index = fs_reg(prog_data->binding_table.ssbo_start + ssbo_index);
- fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
- src_payload, surf_index);
+ fs_reg buffer_size = fs_reg(VGRF, alloc.allocate(regs_written),
+ BRW_REGISTER_TYPE_UD);
+ const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index;
+ fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, buffer_size,
+ src_payload, fs_reg(index));
inst->header_size = 0;
inst->mlen = mlen;
+ inst->regs_written = regs_written;
bld.emit(inst);
- break;
- }
-
- case nir_intrinsic_load_num_work_groups: {
- assert(devinfo->gen >= 7);
- assert(stage == MESA_SHADER_COMPUTE);
-
- struct brw_cs_prog_data *cs_prog_data =
- (struct brw_cs_prog_data *) prog_data;
- const unsigned surface =
- cs_prog_data->binding_table.work_groups_start;
+ bld.MOV(retype(dest, buffer_size.type), buffer_size);
- cs_prog_data->uses_num_work_groups = true;
-
- fs_reg surf_index = fs_reg(surface);
- brw_mark_surface_used(prog_data, surface);
-
- /* Read the 3 GLuint components of gl_NumWorkGroups */
- for (unsigned i = 0; i < 3; i++) {
- fs_reg read_result =
- emit_untyped_read(bld, surf_index,
- fs_reg(i << 2),
- 1 /* dims */, 1 /* size */,
- BRW_PREDICATE_NONE);
- read_result.type = dest.type;
- bld.MOV(dest, read_result);
- dest = offset(dest, bld, 1);
- }
+ brw_mark_surface_used(prog_data, index);
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 9251d9552a5..1b61f9fe01c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -35,8 +35,8 @@ using namespace brw;
static void
assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
{
- if (reg->file == GRF) {
- reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset;
+ if (reg->file == VGRF) {
+ reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset;
reg->reg_offset = 0;
}
}
@@ -366,14 +366,13 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
else
use_ip = ip;
- /* Note that UNIFORM args have been turned into FIXED_HW_REG by
+ /* Note that UNIFORM args have been turned into FIXED_GRF by
* assign_curbe_setup(), and interpolation uses fixed hardware regs from
* the start (see interp_reg()).
*/
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == HW_REG &&
- inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
- int node_nr = inst->src[i].fixed_hw_reg.nr;
+ if (inst->src[i].file == FIXED_GRF) {
+ int node_nr = inst->src[i].nr;
if (node_nr >= payload_node_count)
continue;
@@ -489,10 +488,10 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
if (inst->dst.file == MRF) {
- int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
mrf_used[reg] = true;
if (reg_width == 2) {
- if (inst->dst.reg & BRW_MRF_COMPR4) {
+ if (inst->dst.nr & BRW_MRF_COMPR4) {
mrf_used[reg + 4] = true;
} else {
mrf_used[reg + 1] = true;
@@ -584,8 +583,8 @@ fs_visitor::assign_regs(bool allow_spilling)
* that register and set it to the appropriate class.
*/
if (compiler->fs_reg_sets[rsi].aligned_pairs_class >= 0 &&
- this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
- this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
+ this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == VGRF &&
+ this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].nr == i) {
c = compiler->fs_reg_sets[rsi].aligned_pairs_class;
}
@@ -616,7 +615,7 @@ fs_visitor::assign_regs(bool allow_spilling)
* highest register that works.
*/
if (inst->eot) {
- int size = alloc.sizes[inst->src[0].reg];
+ int size = alloc.sizes[inst->src[0].nr];
int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
/* If something happened to spill, we want to push the EOT send
@@ -625,7 +624,7 @@ fs_visitor::assign_regs(bool allow_spilling)
*/
reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
- ra_set_node_reg(g, inst->src[0].reg, reg);
+ ra_set_node_reg(g, inst->src[0].nr, reg);
break;
}
}
@@ -644,12 +643,12 @@ fs_visitor::assign_regs(bool allow_spilling)
* destination interfere.
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file != GRF)
+ if (inst->dst.file != VGRF)
continue;
for (int i = 0; i < inst->sources; ++i) {
- if (inst->src[i].file == GRF) {
- ra_add_node_interference(g, inst->dst.reg, inst->src[i].reg);
+ if (inst->src[i].file == VGRF) {
+ ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
}
}
}
@@ -786,8 +785,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (unsigned int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- spill_costs[inst->src[i].reg] += loop_scale;
+ if (inst->src[i].file == VGRF) {
+ spill_costs[inst->src[i].nr] += loop_scale;
/* Register spilling logic assumes full-width registers; smeared
* registers have a width of 1 so if we try to spill them we'll
@@ -797,16 +796,16 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
* register pressure anyhow.
*/
if (!inst->src[i].is_contiguous()) {
- no_spill[inst->src[i].reg] = true;
+ no_spill[inst->src[i].nr] = true;
}
}
}
- if (inst->dst.file == GRF) {
- spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
+ if (inst->dst.file == VGRF) {
+ spill_costs[inst->dst.nr] += inst->regs_written * loop_scale;
if (!inst->dst.is_contiguous()) {
- no_spill[inst->dst.reg] = true;
+ no_spill[inst->dst.nr] = true;
}
}
@@ -821,14 +820,14 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
break;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- if (inst->src[0].file == GRF)
- no_spill[inst->src[0].reg] = true;
+ if (inst->src[0].file == VGRF)
+ no_spill[inst->src[0].nr] = true;
break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case SHADER_OPCODE_GEN7_SCRATCH_READ:
- if (inst->dst.file == GRF)
- no_spill[inst->dst.reg] = true;
+ if (inst->dst.file == VGRF)
+ no_spill[inst->dst.nr] = true;
break;
default:
@@ -883,14 +882,14 @@ fs_visitor::spill_reg(int spill_reg)
*/
foreach_block_and_inst (block, fs_inst, inst, cfg) {
for (unsigned int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF &&
- inst->src[i].reg == spill_reg) {
+ if (inst->src[i].file == VGRF &&
+ inst->src[i].nr == spill_reg) {
int regs_read = inst->regs_read(i);
int subset_spill_offset = (spill_offset +
REG_SIZE * inst->src[i].reg_offset);
- fs_reg unspill_dst(GRF, alloc.allocate(regs_read));
+ fs_reg unspill_dst(VGRF, alloc.allocate(regs_read));
- inst->src[i].reg = unspill_dst.reg;
+ inst->src[i].nr = unspill_dst.nr;
inst->src[i].reg_offset = 0;
emit_unspill(block, inst, unspill_dst, subset_spill_offset,
@@ -898,13 +897,13 @@ fs_visitor::spill_reg(int spill_reg)
}
}
- if (inst->dst.file == GRF &&
- inst->dst.reg == spill_reg) {
+ if (inst->dst.file == VGRF &&
+ inst->dst.nr == spill_reg) {
int subset_spill_offset = (spill_offset +
REG_SIZE * inst->dst.reg_offset);
- fs_reg spill_src(GRF, alloc.allocate(inst->regs_written));
+ fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written));
- inst->dst.reg = spill_src.reg;
+ inst->dst.nr = spill_src.nr;
inst->dst.reg_offset = 0;
/* If we're immediately spilling the register, we should not use
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index 34f8715eeb9..4578ad597c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -70,17 +70,17 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) ||
inst->is_partial_write() ||
inst->saturate ||
- inst->src[0].file != GRF ||
+ inst->src[0].file != VGRF ||
inst->src[0].negate ||
inst->src[0].abs ||
!inst->src[0].is_contiguous() ||
- inst->dst.file != GRF ||
+ inst->dst.file != VGRF ||
inst->dst.type != inst->src[0].type) {
return false;
}
- if (v->alloc.sizes[inst->src[0].reg] >
- v->alloc.sizes[inst->dst.reg])
+ if (v->alloc.sizes[inst->src[0].nr] >
+ v->alloc.sizes[inst->dst.nr])
return false;
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
@@ -170,19 +170,19 @@ fs_visitor::register_coalesce()
continue;
}
- if (src_reg != inst->src[0].reg) {
- src_reg = inst->src[0].reg;
+ if (src_reg != inst->src[0].nr) {
+ src_reg = inst->src[0].nr;
- src_size = alloc.sizes[inst->src[0].reg];
+ src_size = alloc.sizes[inst->src[0].nr];
assert(src_size <= MAX_VGRF_SIZE);
channels_remaining = src_size;
memset(mov, 0, sizeof(mov));
- dst_reg = inst->dst.reg;
+ dst_reg = inst->dst.nr;
}
- if (dst_reg != inst->dst.reg)
+ if (dst_reg != inst->dst.nr)
continue;
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
@@ -250,17 +250,17 @@ fs_visitor::register_coalesce()
}
foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg == src_reg) {
- scan_inst->dst.reg = dst_reg;
+ if (scan_inst->dst.file == VGRF &&
+ scan_inst->dst.nr == src_reg) {
+ scan_inst->dst.nr = dst_reg;
scan_inst->dst.reg_offset =
dst_reg_offset[scan_inst->dst.reg_offset];
}
for (int j = 0; j < scan_inst->sources; j++) {
- if (scan_inst->src[j].file == GRF &&
- scan_inst->src[j].reg == src_reg) {
- scan_inst->src[j].reg = dst_reg;
+ if (scan_inst->src[j].file == VGRF &&
+ scan_inst->src[j].nr == src_reg) {
+ scan_inst->src[j].nr = dst_reg;
scan_inst->src[j].reg_offset =
dst_reg_offset[scan_inst->src[j].reg_offset];
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index 862e3245d43..52570943996 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -53,9 +53,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
if (inst->opcode != BRW_OPCODE_MOV ||
!inst->saturate ||
- inst->dst.file != GRF ||
+ inst->dst.file != VGRF ||
inst->dst.type != inst->src[0].type ||
- inst->src[0].file != GRF ||
+ inst->src[0].file != VGRF ||
inst->src[0].abs ||
inst->src[0].negate)
continue;
@@ -90,8 +90,8 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
break;
}
for (int i = 0; i < scan_inst->sources; i++) {
- if (scan_inst->src[i].file == GRF &&
- scan_inst->src[i].reg == inst->src[0].reg &&
+ if (scan_inst->src[i].file == VGRF &&
+ scan_inst->src[i].nr == inst->src[0].nr &&
scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
if (scan_inst->opcode != BRW_OPCODE_MOV ||
!scan_inst->saturate ||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
index 814c551f1be..90edd023b30 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -42,15 +42,15 @@ void
fs_visitor::validate()
{
foreach_block_and_inst (block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
fsv_assert(inst->dst.reg_offset + inst->regs_written <=
- alloc.sizes[inst->dst.reg]);
+ alloc.sizes[inst->dst.nr]);
}
for (unsigned i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
- (int)alloc.sizes[inst->src[i].reg]);
+ (int)alloc.sizes[inst->src[i].nr]);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5c57944ca39..a7bd9cea7af 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -143,7 +143,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
* tracking to get the scaling factor.
*/
if (devinfo->gen < 6 && is_rect) {
- fs_reg dst = fs_reg(GRF, alloc.allocate(coord_components));
+ fs_reg dst = fs_reg(VGRF, alloc.allocate(coord_components));
fs_reg src = coordinate;
coordinate = dst;
@@ -208,8 +208,8 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
ARRAY_SIZE(srcs));
- /* We only care about one reg of response, but the sampler always writes
- * 4/8.
+ /* We only care about one or two regs of response, but the sampler always
+ * writes 4/8.
*/
inst->regs_written = 4 * dispatch_width / 8;
@@ -295,7 +295,10 @@ fs_visitor::emit_texture(ir_texture_opcode op,
opcode = SHADER_OPCODE_TXF_LOGICAL;
break;
case ir_txf_ms:
- opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
+ if ((key_tex->msaa_16 & (1 << sampler)))
+ opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
+ else
+ opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
break;
case ir_txs:
case ir_query_levels:
@@ -319,7 +322,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
inst->shadow_compare = true;
if (offset_value.file == IMM)
- inst->offset = offset_value.fixed_hw_reg.dw1.ud;
+ inst->offset = offset_value.ud;
if (op == ir_tg4) {
inst->offset |=
@@ -578,7 +581,7 @@ fs_visitor::emit_interpolation_setup_gen6()
* Thus we can do a single add(16) in SIMD8 or an add(32) in SIMD16 to
* compute our pixel centers.
*/
- fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8),
+ fs_reg int_pixel_xy(VGRF, alloc.allocate(dispatch_width / 8),
BRW_REGISTER_TYPE_UW);
const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0);
@@ -873,14 +876,14 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
abld.MUL(output, outputs[clip_vertex], u);
for (int j = 1; j < 4; j++) {
- u.reg = userplane[i].reg + j;
+ u.nr = userplane[i].nr + j;
abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u);
}
}
}
void
-fs_visitor::emit_urb_writes()
+fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
{
int slot, urb_offset, length;
int starting_urb_offset = 0;
@@ -905,7 +908,7 @@ fs_visitor::emit_urb_writes()
* "The write data payload can be between 1 and 8 message phases long."
*/
if (vue_map->slots_valid == 0) {
- fs_reg payload = fs_reg(GRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
BRW_REGISTER_TYPE_UD)));
@@ -916,9 +919,13 @@ fs_visitor::emit_urb_writes()
return;
}
+ opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8;
+ int header_size = 1;
+ fs_reg per_slot_offsets;
+
if (stage == MESA_SHADER_GEOMETRY) {
const struct brw_gs_prog_data *gs_prog_data =
- (const struct brw_gs_prog_data *) prog_data;
+ (const struct brw_gs_prog_data *) this->prog_data;
/* We need to increment the Global Offset to skip over the control data
* header and the extra "Vertex Count" field (1 HWord) at the beginning
@@ -927,6 +934,27 @@ fs_visitor::emit_urb_writes()
starting_urb_offset = 2 * gs_prog_data->control_data_header_size_hwords;
if (gs_prog_data->static_vertex_count == -1)
starting_urb_offset += 2;
+
+ /* We also need to use per-slot offsets. The per-slot offset is the
+ * Vertex Count. SIMD8 mode processes 8 different primitives at a
+ * time; each may output a different number of vertices.
+ */
+ opcode = SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT;
+ header_size++;
+
+ /* The URB offset is in 128-bit units, so we need to multiply by 2 */
+ const int output_vertex_size_owords =
+ gs_prog_data->output_vertex_size_hwords * 2;
+
+ fs_reg offset;
+ if (gs_vertex_count.file == IMM) {
+ per_slot_offsets = fs_reg(output_vertex_size_owords *
+ gs_vertex_count.ud);
+ } else {
+ per_slot_offsets = vgrf(glsl_type::int_type);
+ bld.MUL(per_slot_offsets, gs_vertex_count,
+ fs_reg(output_vertex_size_owords));
+ }
}
length = 0;
@@ -947,7 +975,7 @@ fs_visitor::emit_urb_writes()
break;
}
- fs_reg zero(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg zero(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
bld.MOV(zero, fs_reg(0u));
sources[length++] = zero;
@@ -999,7 +1027,7 @@ fs_visitor::emit_urb_writes()
* temp register and use that for the payload.
*/
for (int i = 0; i < 4; i++) {
- fs_reg reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
+ fs_reg reg = fs_reg(VGRF, alloc.allocate(1), outputs[varying].type);
fs_reg src = offset(this->outputs[varying], bld, i);
set_saturate(true, bld.MOV(reg, src));
sources[length++] = reg;
@@ -1023,19 +1051,25 @@ fs_visitor::emit_urb_writes()
if (length == 8 || last)
flush = true;
if (flush) {
- fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
- fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
+ fs_reg *payload_sources =
+ ralloc_array(mem_ctx, fs_reg, length + header_size);
+ fs_reg payload = fs_reg(VGRF, alloc.allocate(length + header_size),
BRW_REGISTER_TYPE_F);
payload_sources[0] =
fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
- memcpy(&payload_sources[1], sources, length * sizeof sources[0]);
- abld.LOAD_PAYLOAD(payload, payload_sources, length + 1, 1);
+ if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT)
+ payload_sources[1] = per_slot_offsets;
- fs_inst *inst =
- abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+ memcpy(&payload_sources[header_size], sources,
+ length * sizeof sources[0]);
+
+ abld.LOAD_PAYLOAD(payload, payload_sources, length + header_size,
+ header_size);
+
+ fs_inst *inst = abld.emit(opcode, reg_undef, payload);
inst->eot = last && stage == MESA_SHADER_VERTEX;
- inst->mlen = length + 1;
+ inst->mlen = length + header_size;
inst->offset = urb_offset;
urb_offset = starting_urb_offset + slot + 1;
length = 0;
@@ -1057,7 +1091,7 @@ fs_visitor::emit_cs_terminate()
* make sure it uses the appropriate register range.
*/
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
- fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
bld.group(8, 0).exec_all().MOV(payload, g0);
/* Send a message to the thread spawner to terminate the thread. */
@@ -1074,7 +1108,7 @@ fs_visitor::emit_barrier()
/* We are getting the barrier ID from the compute shader header */
assert(stage == MESA_SHADER_COMPUTE);
- fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
const fs_builder pbld = bld.exec_all().group(8, 0);
@@ -1112,13 +1146,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
struct brw_gs_compile *c,
struct brw_gs_prog_data *prog_data,
- const nir_shader *shader)
+ const nir_shader *shader,
+ int shader_time_index)
: backend_shader(compiler, log_data, mem_ctx, shader,
&prog_data->base.base),
key(&c->key), gs_compile(c),
prog_data(&prog_data->base.base), prog(NULL),
dispatch_width(8),
- shader_time_index(ST_GS),
+ shader_time_index(shader_time_index),
bld(fs_builder(this, dispatch_width).at_end())
{
init();
@@ -1155,7 +1190,6 @@ fs_visitor::init()
this->nir_ssa_values = NULL;
memset(&this->payload, 0, sizeof(this->payload));
- memset(this->outputs, 0, sizeof(this->outputs));
memset(this->output_components, 0, sizeof(this->output_components));
this->source_depth_to_render_target = false;
this->runtime_check_aads_emit = false;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 4417555f18e..7e977e9e727 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -41,9 +41,9 @@ public:
explicit fs_reg(uint32_t u);
explicit fs_reg(uint8_t vf[4]);
explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
- fs_reg(struct brw_reg fixed_hw_reg);
- fs_reg(enum register_file file, int reg);
- fs_reg(enum register_file file, int reg, enum brw_reg_type type);
+ fs_reg(struct brw_reg reg);
+ fs_reg(enum brw_reg_file file, int nr);
+ fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
bool equals(const fs_reg &r) const;
bool is_contiguous() const;
@@ -72,7 +72,7 @@ public:
static inline fs_reg
negate(fs_reg reg)
{
- assert(reg.file != HW_REG && reg.file != IMM);
+ assert(reg.file != IMM);
reg.negate = !reg.negate;
return reg;
}
@@ -80,7 +80,7 @@ negate(fs_reg reg)
static inline fs_reg
retype(fs_reg reg, enum brw_reg_type type)
{
- reg.fixed_hw_reg.type = reg.type = type;
+ reg.type = type;
return reg;
}
@@ -90,15 +90,16 @@ byte_offset(fs_reg reg, unsigned delta)
switch (reg.file) {
case BAD_FILE:
break;
- case GRF:
+ case VGRF:
case ATTR:
reg.reg_offset += delta / 32;
break;
case MRF:
- reg.reg += delta / 32;
+ reg.nr += delta / 32;
break;
+ case ARF:
+ case FIXED_GRF:
case IMM:
- case HW_REG:
case UNIFORM:
assert(delta == 0);
}
@@ -117,11 +118,12 @@ horiz_offset(fs_reg reg, unsigned delta)
* horizontal offset should be a harmless no-op.
*/
break;
- case GRF:
+ case VGRF:
case MRF:
case ATTR:
return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
- case HW_REG:
+ case ARF:
+ case FIXED_GRF:
assert(delta == 0);
}
return reg;
@@ -159,12 +161,13 @@ half(fs_reg reg, unsigned idx)
case IMM:
return reg;
- case GRF:
+ case VGRF:
case MRF:
return horiz_offset(reg, 8 * idx);
+ case ARF:
+ case FIXED_GRF:
case ATTR:
- case HW_REG:
unreachable("Cannot take half of this register type");
}
return reg;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 29642c6d2a4..110e64b979e 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -39,7 +39,7 @@ public:
void init();
- src_reg(register_file file, int reg, const glsl_type *type);
+ src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
src_reg();
src_reg(float f);
src_reg(uint32_t u);
@@ -55,22 +55,21 @@ public:
explicit src_reg(const dst_reg &reg);
- unsigned swizzle; /**< BRW_SWIZZLE_XYZW macros from brw_reg.h. */
-
src_reg *reladdr;
};
static inline src_reg
retype(src_reg reg, enum brw_reg_type type)
{
- reg.fixed_hw_reg.type = reg.type = type;
+ reg.type = type;
return reg;
}
static inline src_reg
offset(src_reg reg, unsigned delta)
{
- assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM));
+ assert(delta == 0 ||
+ (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
reg.reg_offset += delta;
return reg;
}
@@ -82,7 +81,6 @@ offset(src_reg reg, unsigned delta)
static inline src_reg
swizzle(src_reg reg, unsigned swizzle)
{
- assert(reg.file != HW_REG);
reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
return reg;
}
@@ -90,7 +88,7 @@ swizzle(src_reg reg, unsigned swizzle)
static inline src_reg
negate(src_reg reg)
{
- assert(reg.file != HW_REG && reg.file != IMM);
+ assert(reg.file != IMM);
reg.negate = !reg.negate;
return reg;
}
@@ -110,10 +108,10 @@ public:
void init();
dst_reg();
- dst_reg(register_file file, int reg);
- dst_reg(register_file file, int reg, const glsl_type *type,
+ dst_reg(enum brw_reg_file file, int nr);
+ dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
unsigned writemask);
- dst_reg(register_file file, int reg, brw_reg_type type,
+ dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
unsigned writemask);
dst_reg(struct brw_reg reg);
dst_reg(class vec4_visitor *v, const struct glsl_type *type);
@@ -122,22 +120,21 @@ public:
bool equals(const dst_reg &r) const;
- unsigned writemask; /**< Bitfield of WRITEMASK_[XYZW] */
-
src_reg *reladdr;
};
static inline dst_reg
retype(dst_reg reg, enum brw_reg_type type)
{
- reg.fixed_hw_reg.type = reg.type = type;
+ reg.type = type;
return reg;
}
static inline dst_reg
offset(dst_reg reg, unsigned delta)
{
- assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM));
+ assert(delta == 0 ||
+ (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
reg.reg_offset += delta;
return reg;
}
@@ -145,7 +142,7 @@ offset(dst_reg reg, unsigned delta)
static inline dst_reg
writemask(dst_reg reg, unsigned mask)
{
- assert(reg.file != HW_REG && reg.file != IMM);
+ assert(reg.file != IMM);
assert((reg.writemask & mask) != 0);
reg.writemask &= mask;
return reg;
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index fc9bee43d80..29911732761 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -157,8 +157,6 @@ process_glsl_ir(gl_shader_stage stage,
_mesa_shader_stage_to_abbrev(shader->Stage));
}
- lower_ubo_reference(shader, shader->ir);
-
bool progress;
do {
progress = false;
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index fbde3f04204..12e7c32e424 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -314,8 +314,7 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
}
static void
-get_buffer_rect(struct brw_context *brw, struct gl_framebuffer *fb,
- struct intel_renderbuffer *irb, struct rect *rect)
+get_buffer_rect(const struct gl_framebuffer *fb, struct rect *rect)
{
rect->x0 = fb->_Xmin;
rect->x1 = fb->_Xmax;
@@ -526,16 +525,18 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
case REP_CLEAR:
rep_clear_buffers |= 1 << index;
- get_buffer_rect(brw, fb, irb, &clear_rect);
+ get_buffer_rect(fb, &clear_rect);
break;
case PLAIN_CLEAR:
plain_clear_buffers |= 1 << index;
- get_buffer_rect(brw, fb, irb, &clear_rect);
+ get_buffer_rect(fb, &clear_rect);
continue;
}
}
+ assert((fast_clear_buffers & rep_clear_buffers) == 0);
+
if (!(fast_clear_buffers | rep_clear_buffers)) {
if (plain_clear_buffers)
/* If we only have plain clears, skip the meta save/restore. */
diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
index cbbb919c6ee..4e9aa949506 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -163,6 +163,13 @@ static const char *fs_tmpl =
" txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
" txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
" sample_index = (X & 0x4) | (Y & 0x2) | ((X & 0x2) >> 1);\n"
+ " break;\n"
+ " case 16:\n"
+ " txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
+ " txl_coords.y = ((Y & int(0xfff8)) >> 2) | (Y & int(0x1));\n"
+ " sample_index = (((Y & 0x4) << 1) | (X & 0x4) | (Y & 0x2) |\n"
+ " ((X & 0x2) >> 1));\n"
+ " break;\n"
" }\n"
"}\n"
"\n"
@@ -313,11 +320,16 @@ adjust_msaa(struct blit_dims *dims, int num_samples)
dims->dst_x0 *= 2;
dims->dst_x1 *= 2;
} else if (num_samples) {
- const int x_num_samples = num_samples / 2;
- dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0 * x_num_samples, num_samples);
- dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0 * 2, 4);
- dims->dst_x1 = ALIGN(dims->dst_x1 * x_num_samples, num_samples);
- dims->dst_y1 = ALIGN(dims->dst_y1 * 2, 4);
+ const int y_num_samples = num_samples >= 16 ? 4 : 2;
+ const int x_num_samples = num_samples / y_num_samples;
+ dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0 * x_num_samples,
+ x_num_samples * 2);
+ dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0 * y_num_samples,
+ y_num_samples * 2);
+ dims->dst_x1 = ALIGN(dims->dst_x1 * x_num_samples,
+ x_num_samples * 2);
+ dims->dst_y1 = ALIGN(dims->dst_y1 * y_num_samples,
+ y_num_samples * 2);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_multisample_state.h b/src/mesa/drivers/dri/i965/brw_multisample_state.h
index 26633e72983..42a7fd35121 100644
--- a/src/mesa/drivers/dri/i965/brw_multisample_state.h
+++ b/src/mesa/drivers/dri/i965/brw_multisample_state.h
@@ -81,3 +81,29 @@ brw_multisample_positions_4x = 0xae2ae662;
*/
static const uint32_t
brw_multisample_positions_8x[] = { 0xdbb39d79, 0x3ff55117 };
+
+/**
+ * Sample positions:
+ *
+ * 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ * 0 15
+ * 1 9
+ * 2 10
+ * 3 7
+ * 4 13
+ * 5 1
+ * 6 4
+ * 7 3
+ * 8 12
+ * 9 0
+ * a 2
+ * b 6
+ * c 11
+ * d 5
+ * e 8
+ * f 14
+ */
+static const uint32_t
+brw_multisample_positions_16x[] = {
+ 0xc75a7599, 0xb3dbad36, 0x2c42816e, 0x10eff408
+};
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 8c1a34ee17a..58754adc887 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -56,7 +56,8 @@ remap_vs_attrs(nir_block *block, void *closure)
}
static void
-brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
+brw_nir_lower_inputs(const struct brw_device_info *devinfo,
+ nir_shader *nir, bool is_scalar)
{
switch (nir->stage) {
case MESA_SHADER_VERTEX:
@@ -90,11 +91,43 @@ brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
}
}
break;
- case MESA_SHADER_GEOMETRY:
- foreach_list_typed(nir_variable, var, node, &nir->inputs) {
- var->data.driver_location = var->data.location;
+ case MESA_SHADER_GEOMETRY: {
+ if (!is_scalar) {
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+ var->data.driver_location = var->data.location;
+ }
+ } else {
+ /* The GLSL linker will have already matched up GS inputs and
+ * the outputs of prior stages. The driver does extend VS outputs
+ * in some cases, but only for legacy OpenGL or Gen4-5 hardware,
+ * neither of which offer geometry shader support. So we can
+ * safely ignore that.
+ *
+ * For SSO pipelines, we use a fixed VUE map layout based on variable
+ * locations, so we can rely on rendezvous-by-location to make this
+ * work.
+ *
+ * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+ * written by previous stages and shows up via payload magic.
+ */
+ struct brw_vue_map input_vue_map;
+ GLbitfield64 inputs_read =
+ nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
+ brw_compute_vue_map(devinfo, &input_vue_map, inputs_read,
+ nir->info.separate_shader);
+
+ /* Start with the slot for the variable's base. */
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+ assert(input_vue_map.varying_to_slot[var->data.location] != -1);
+ var->data.driver_location =
+ input_vue_map.varying_to_slot[var->data.location];
+ }
+
+ /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
+ nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
}
break;
+ }
case MESA_SHADER_FRAGMENT:
assert(is_scalar);
nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
@@ -117,7 +150,8 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
case MESA_SHADER_GEOMETRY:
if (is_scalar) {
nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
- type_size_scalar);
+ type_size_vec4_times_4);
+ nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
} else {
nir_foreach_variable(var, &nir->outputs)
var->data.driver_location = var->data.location;
@@ -187,6 +221,7 @@ brw_create_nir(struct brw_context *brw,
bool is_scalar)
{
struct gl_context *ctx = &brw->ctx;
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
const nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
nir_shader *nir;
@@ -267,7 +302,7 @@ brw_postprocess_nir(nir_shader *nir,
bool debug_enabled =
(INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
- brw_nir_lower_inputs(nir, is_scalar);
+ brw_nir_lower_inputs(devinfo, nir, is_scalar);
brw_nir_lower_outputs(nir, is_scalar);
nir_assign_var_locations(&nir->uniforms,
&nir->num_uniforms,
@@ -285,7 +320,7 @@ brw_postprocess_nir(nir_shader *nir,
if (devinfo->gen >= 6) {
/* Try and fuse multiply-adds */
- nir_opt_peephole_ffma(nir);
+ brw_nir_opt_peephole_ffma(nir);
nir_validate_shader(nir);
}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index a6d6768795a..d259777e1c9 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -103,6 +103,8 @@ void brw_nir_setup_glsl_uniforms(nir_shader *shader,
void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data);
+bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
new file mode 100644
index 00000000000..5603129bde7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "brw_nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for a multiply that
+ * is only ever used in an add and replaces both with an fma.
+ */
+
+struct peephole_ffma_state {
+ void *mem_ctx;
+ nir_function_impl *impl;
+ bool progress;
+};
+
+static inline bool
+are_all_uses_fadd(nir_ssa_def *def)
+{
+ if (!list_empty(&def->if_uses))
+ return false;
+
+ nir_foreach_use(def, use_src) {
+ nir_instr *use_instr = use_src->parent_instr;
+
+ if (use_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+ switch (use_alu->op) {
+ case nir_op_fadd:
+ break; /* This one's ok */
+
+ case nir_op_imov:
+ case nir_op_fmov:
+ case nir_op_fneg:
+ case nir_op_fabs:
+ assert(use_alu->dest.dest.is_ssa);
+ if (!are_all_uses_fadd(&use_alu->dest.dest.ssa))
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static nir_alu_instr *
+get_mul_for_src(nir_alu_src *src, int num_components,
+ uint8_t swizzle[4], bool *negate, bool *abs)
+{
+ uint8_t swizzle_tmp[4];
+ assert(src->src.is_ssa && !src->abs && !src->negate);
+
+ nir_instr *instr = src->src.ssa->parent_instr;
+ if (instr->type != nir_instr_type_alu)
+ return NULL;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_imov:
+ case nir_op_fmov:
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ break;
+
+ case nir_op_fneg:
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ *negate = !*negate;
+ break;
+
+ case nir_op_fabs:
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ *negate = false;
+ *abs = true;
+ break;
+
+ case nir_op_fmul:
+ /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
+ * operations. This prevents us from being too aggressive with our
+ * fusing which can actually lead to more instructions.
+ */
+ if (!are_all_uses_fadd(&alu->dest.dest.ssa))
+ return NULL;
+ break;
+
+ default:
+ return NULL;
+ }
+
+ if (!alu)
+ return NULL;
+
+ /* Copy swizzle data before overwriting it to avoid setting a wrong swizzle.
+ *
+ * Example:
+ * Former swizzle[] = xyzw
+ * src->swizzle[] = zyxx
+ *
+ * Expected output swizzle = zyxx
+ * If we reuse swizzle in the loop, then output swizzle would be zyzz.
+ */
+ memcpy(swizzle_tmp, swizzle, 4*sizeof(uint8_t));
+ for (int i = 0; i < num_components; i++)
+ swizzle[i] = swizzle_tmp[src->swizzle[i]];
+
+ return alu;
+}
+
+/**
+ * Given a list of (at least two) nir_alu_src's, tells if any of them is a
+ * constant value and is used only once.
+ */
+static bool
+any_alu_src_is_a_constant(nir_alu_src srcs[])
+{
+ for (unsigned i = 0; i < 2; i++) {
+ if (srcs[i].src.ssa->parent_instr->type == nir_instr_type_load_const) {
+ nir_load_const_instr *load_const =
+ nir_instr_as_load_const (srcs[i].src.ssa->parent_instr);
+
+ if (list_is_singular(&load_const->def.uses) &&
+ list_empty(&load_const->def.if_uses)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool
+brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
+{
+ struct peephole_ffma_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *add = nir_instr_as_alu(instr);
+ if (add->op != nir_op_fadd)
+ continue;
+
+ /* TODO: Maybe bail if this expression is considered "precise"? */
+
+ assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
+
+ /* This, is the case a + a. We would rather handle this with an
+ * algebraic reduction than fuse it. Also, we want to only fuse
+ * things where the multiply is used only once and, in this case,
+ * it would be used twice by the same instruction.
+ */
+ if (add->src[0].src.ssa == add->src[1].src.ssa)
+ continue;
+
+ nir_alu_instr *mul;
+ uint8_t add_mul_src, swizzle[4];
+ bool negate, abs;
+ for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+ for (unsigned i = 0; i < 4; i++)
+ swizzle[i] = i;
+
+ negate = false;
+ abs = false;
+
+ mul = get_mul_for_src(&add->src[add_mul_src],
+ add->dest.dest.ssa.num_components,
+ swizzle, &negate, &abs);
+
+ if (mul != NULL)
+ break;
+ }
+
+ if (mul == NULL)
+ continue;
+
+ nir_ssa_def *mul_src[2];
+ mul_src[0] = mul->src[0].src.ssa;
+ mul_src[1] = mul->src[1].src.ssa;
+
+ /* If any of the operands of the fmul and any of the fadd is a constant,
+ * we bypass because it will be more efficient as the constants will be
+ * propagated as operands, potentially saving two load_const instructions.
+ */
+ if (any_alu_src_is_a_constant(mul->src) &&
+ any_alu_src_is_a_constant(add->src)) {
+ continue;
+ }
+
+ if (abs) {
+ for (unsigned i = 0; i < 2; i++) {
+ nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fabs);
+ abs->src[0].src = nir_src_for_ssa(mul_src[i]);
+ nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
+ mul_src[i]->num_components, NULL);
+ abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &abs->instr);
+ mul_src[i] = &abs->dest.dest.ssa;
+ }
+ }
+
+ if (negate) {
+ nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fneg);
+ neg->src[0].src = nir_src_for_ssa(mul_src[0]);
+ nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
+ mul_src[0]->num_components, NULL);
+ neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &neg->instr);
+ mul_src[0] = &neg->dest.dest.ssa;
+ }
+
+ nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma);
+ ffma->dest.saturate = add->dest.saturate;
+ ffma->dest.write_mask = add->dest.write_mask;
+
+ for (unsigned i = 0; i < 2; i++) {
+ ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+ for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+ ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
+ }
+ nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma);
+
+ assert(add->dest.dest.is_ssa);
+
+ nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+ add->dest.dest.ssa.num_components,
+ add->dest.dest.ssa.name);
+ nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
+ nir_src_for_ssa(&ffma->dest.dest.ssa));
+
+ nir_instr_insert_before(&add->instr, &ffma->instr);
+ assert(list_empty(&add->dest.dest.ssa.uses));
+ nir_instr_remove(&add->instr);
+
+ state->progress = true;
+ }
+
+ return true;
+}
+
+static bool
+brw_nir_opt_peephole_ffma_impl(nir_function_impl *impl)
+{
+ struct peephole_ffma_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.impl = impl;
+ state.progress = false;
+
+ nir_foreach_block(impl, brw_nir_opt_peephole_ffma_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+}
+
+bool
+brw_nir_opt_peephole_ffma(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ progress |= brw_nir_opt_peephole_ffma_impl(overload->impl);
+ }
+
+ return progress;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index d3326e9fb86..87b383919df 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -98,6 +98,8 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
if (storage->type->is_image()) {
brw_setup_image_uniform_values(stage, stage_prog_data,
uniform_index, storage);
+ uniform_index +=
+ BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1);
} else {
gl_constant_value *components = storage->storage;
unsigned vector_count = (MAX2(storage->array_elements, 1) *
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 083c46a3726..3da83b43b5d 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -219,7 +219,7 @@ enum PACKED brw_reg_type {
};
unsigned brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
- enum brw_reg_type type, unsigned file);
+ enum brw_reg_type type, enum brw_reg_file file);
const char *brw_reg_type_letters(unsigned brw_reg_type);
#define REG_SIZE (8*4)
@@ -232,29 +232,29 @@ const char *brw_reg_type_letters(unsigned brw_reg_type);
*/
struct brw_reg {
enum brw_reg_type type:4;
- unsigned file:2;
- unsigned nr:8;
- unsigned subnr:5; /* :1 in align16 */
+ enum brw_reg_file file:3; /* :2 hardware format */
unsigned negate:1; /* source only */
unsigned abs:1; /* source only */
- unsigned vstride:4; /* source only */
- unsigned width:3; /* src only, align1 only */
- unsigned hstride:2; /* align1 only */
unsigned address_mode:1; /* relative addressing, hopefully! */
unsigned pad0:1;
+ unsigned subnr:5; /* :1 in align16 */
+ unsigned nr:16;
union {
struct {
unsigned swizzle:8; /* src only, align16 only */
unsigned writemask:4; /* dest only, align16 only */
int indirect_offset:10; /* relative addressing offset */
- unsigned pad1:10; /* two dwords total */
- } bits;
+ unsigned vstride:4; /* source only */
+ unsigned width:3; /* src only, align1 only */
+ unsigned hstride:2; /* align1 only */
+ unsigned pad1:1;
+ };
float f;
int d;
unsigned ud;
- } dw1;
+ };
};
@@ -329,7 +329,7 @@ type_is_signed(unsigned type)
* \param writemask WRITEMASK_X/Y/Z/W bitfield
*/
static inline struct brw_reg
-brw_reg(unsigned file,
+brw_reg(enum brw_reg_file file,
unsigned nr,
unsigned subnr,
unsigned negate,
@@ -353,15 +353,12 @@ brw_reg(unsigned file,
reg.type = type;
reg.file = file;
- reg.nr = nr;
- reg.subnr = subnr * type_sz(type);
reg.negate = negate;
reg.abs = abs;
- reg.vstride = vstride;
- reg.width = width;
- reg.hstride = hstride;
reg.address_mode = BRW_ADDRESS_DIRECT;
reg.pad0 = 0;
+ reg.subnr = subnr * type_sz(type);
+ reg.nr = nr;
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
* set swizzle and writemask to W, as the lower bits of subnr will
@@ -369,16 +366,19 @@ brw_reg(unsigned file,
* keep track of as you'd want it adjusted by suboffset(), etc.
* Perhaps fix up when converting to align16?
*/
- reg.dw1.bits.swizzle = swizzle;
- reg.dw1.bits.writemask = writemask;
- reg.dw1.bits.indirect_offset = 0;
- reg.dw1.bits.pad1 = 0;
+ reg.swizzle = swizzle;
+ reg.writemask = writemask;
+ reg.indirect_offset = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.pad1 = 0;
return reg;
}
/** Construct float[16] register */
static inline struct brw_reg
-brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return brw_reg(file,
nr,
@@ -395,7 +395,7 @@ brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
/** Construct float[8] register */
static inline struct brw_reg
-brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return brw_reg(file,
nr,
@@ -412,7 +412,7 @@ brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
/** Construct float[4] register */
static inline struct brw_reg
-brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return brw_reg(file,
nr,
@@ -429,7 +429,7 @@ brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
/** Construct float[2] register */
static inline struct brw_reg
-brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return brw_reg(file,
nr,
@@ -446,7 +446,7 @@ brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
/** Construct float[1] register */
static inline struct brw_reg
-brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return brw_reg(file,
nr,
@@ -462,7 +462,8 @@ brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
}
static inline struct brw_reg
-brw_vecn_reg(unsigned width, unsigned file, unsigned nr, unsigned subnr)
+brw_vecn_reg(unsigned width, enum brw_reg_file file,
+ unsigned nr, unsigned subnr)
{
switch (width) {
case 1:
@@ -529,21 +530,21 @@ byte_offset(struct brw_reg reg, unsigned bytes)
/** Construct unsigned word[16] register */
static inline struct brw_reg
-brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
/** Construct unsigned word[8] register */
static inline struct brw_reg
-brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
/** Construct unsigned word[1] register */
static inline struct brw_reg
-brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
{
return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
@@ -569,7 +570,7 @@ static inline struct brw_reg
brw_imm_f(float f)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
- imm.dw1.f = f;
+ imm.f = f;
return imm;
}
@@ -578,7 +579,7 @@ static inline struct brw_reg
brw_imm_d(int d)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
- imm.dw1.d = d;
+ imm.d = d;
return imm;
}
@@ -587,7 +588,7 @@ static inline struct brw_reg
brw_imm_ud(unsigned ud)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
- imm.dw1.ud = ud;
+ imm.ud = ud;
return imm;
}
@@ -596,7 +597,7 @@ static inline struct brw_reg
brw_imm_uw(uint16_t uw)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
- imm.dw1.ud = uw | (uw << 16);
+ imm.ud = uw | (uw << 16);
return imm;
}
@@ -605,7 +606,7 @@ static inline struct brw_reg
brw_imm_w(int16_t w)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
- imm.dw1.d = w | (w << 16);
+ imm.d = w | (w << 16);
return imm;
}
@@ -621,7 +622,7 @@ brw_imm_v(unsigned v)
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_8;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
- imm.dw1.ud = v;
+ imm.ud = v;
return imm;
}
@@ -633,7 +634,7 @@ brw_imm_vf(unsigned v)
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
- imm.dw1.ud = v;
+ imm.ud = v;
return imm;
}
@@ -923,8 +924,8 @@ brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
- reg.dw1.bits.swizzle = brw_compose_swizzle(BRW_SWIZZLE4(x, y, z, w),
- reg.dw1.bits.swizzle);
+ reg.swizzle = brw_compose_swizzle(BRW_SWIZZLE4(x, y, z, w),
+ reg.swizzle);
return reg;
}
@@ -939,7 +940,7 @@ static inline struct brw_reg
brw_writemask(struct brw_reg reg, unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
- reg.dw1.bits.writemask &= mask;
+ reg.writemask &= mask;
return reg;
}
@@ -947,7 +948,7 @@ static inline struct brw_reg
brw_set_writemask(struct brw_reg reg, unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
- reg.dw1.bits.writemask = mask;
+ reg.writemask = mask;
return reg;
}
@@ -980,7 +981,7 @@ brw_vec4_indirect(unsigned subnr, int offset)
struct brw_reg reg = brw_vec4_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
- reg.dw1.bits.indirect_offset = offset;
+ reg.indirect_offset = offset;
return reg;
}
@@ -990,7 +991,18 @@ brw_vec1_indirect(unsigned subnr, int offset)
struct brw_reg reg = brw_vec1_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
- reg.dw1.bits.indirect_offset = offset;
+ reg.indirect_offset = offset;
+ return reg;
+}
+
+static inline struct brw_reg
+brw_VxH_indirect(unsigned subnr, int offset)
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.indirect_offset = offset;
return reg;
}
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 88c45f74333..776f75d3e58 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -583,15 +583,14 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
if (is_src_duplicate(inst, i))
continue;
- if (inst->src[i].file == GRF) {
- reads_remaining[inst->src[i].reg]++;
- } else if (inst->src[i].file == HW_REG &&
- inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
- if (inst->src[i].fixed_hw_reg.nr >= hw_reg_count)
+ if (inst->src[i].file == VGRF) {
+ reads_remaining[inst->src[i].nr]++;
+ } else if (inst->src[i].file == FIXED_GRF) {
+ if (inst->src[i].nr >= hw_reg_count)
continue;
for (int j = 0; j < inst->regs_read(i); j++)
- hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + j]++;
+ hw_reads_remaining[inst->src[i].nr + j]++;
}
}
}
@@ -660,21 +659,20 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
if (!reads_remaining)
return;
- if (inst->dst.file == GRF) {
- written[inst->dst.reg] = true;
+ if (inst->dst.file == VGRF) {
+ written[inst->dst.nr] = true;
}
for (int i = 0; i < inst->sources; i++) {
if (is_src_duplicate(inst, i))
continue;
- if (inst->src[i].file == GRF) {
- reads_remaining[inst->src[i].reg]--;
- } else if (inst->src[i].file == HW_REG &&
- inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
- inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+ if (inst->src[i].file == VGRF) {
+ reads_remaining[inst->src[i].nr]--;
+ } else if (inst->src[i].file == FIXED_GRF &&
+ inst->src[i].nr < hw_reg_count) {
for (int off = 0; off < inst->regs_read(i); off++)
- hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + off]--;
+ hw_reads_remaining[inst->src[i].nr + off]--;
}
}
}
@@ -685,26 +683,25 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
fs_inst *inst = (fs_inst *)be;
int benefit = 0;
- if (inst->dst.file == GRF) {
- if (!BITSET_TEST(livein[block_idx], inst->dst.reg) &&
- !written[inst->dst.reg])
- benefit -= v->alloc.sizes[inst->dst.reg];
+ if (inst->dst.file == VGRF) {
+ if (!BITSET_TEST(livein[block_idx], inst->dst.nr) &&
+ !written[inst->dst.nr])
+ benefit -= v->alloc.sizes[inst->dst.nr];
}
for (int i = 0; i < inst->sources; i++) {
if (is_src_duplicate(inst, i))
continue;
- if (inst->src[i].file == GRF &&
- !BITSET_TEST(liveout[block_idx], inst->src[i].reg) &&
- reads_remaining[inst->src[i].reg] == 1)
- benefit += v->alloc.sizes[inst->src[i].reg];
+ if (inst->src[i].file == VGRF &&
+ !BITSET_TEST(liveout[block_idx], inst->src[i].nr) &&
+ reads_remaining[inst->src[i].nr] == 1)
+ benefit += v->alloc.sizes[inst->src[i].nr];
- if (inst->src[i].file == HW_REG &&
- inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
- inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+ if (inst->src[i].file == FIXED_GRF &&
+ inst->src[i].nr < hw_reg_count) {
for (int off = 0; off < inst->regs_read(i); off++) {
- int reg = inst->src[i].fixed_hw_reg.nr + off;
+ int reg = inst->src[i].nr + off;
if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
hw_reads_remaining[reg] == 1) {
benefit++;
@@ -927,7 +924,6 @@ fs_instruction_scheduler::calculate_deps()
* granular level.
*/
schedule_node *last_fixed_grf_write = NULL;
- int reg_width = v->dispatch_width / 8;
/* The last instruction always needs to still be the last
* instruction. Either it's flow control (IF, ELSE, ENDIF, DO,
@@ -951,24 +947,19 @@ fs_instruction_scheduler::calculate_deps()
/* read-after-write deps. */
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
if (post_reg_alloc) {
for (int r = 0; r < inst->regs_read(i); r++)
- add_dep(last_grf_write[inst->src[i].reg + r], n);
+ add_dep(last_grf_write[inst->src[i].nr + r], n);
} else {
for (int r = 0; r < inst->regs_read(i); r++) {
- add_dep(last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], n);
+ add_dep(last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], n);
}
}
- } else if (inst->src[i].file == HW_REG &&
- (inst->src[i].fixed_hw_reg.file ==
- BRW_GENERAL_REGISTER_FILE)) {
+ } else if (inst->src[i].file == FIXED_GRF) {
if (post_reg_alloc) {
- int size = reg_width;
- if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
- size = 1;
- for (int r = 0; r < size; r++)
- add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
+ for (int r = 0; r < inst->regs_read(i); r++)
+ add_dep(last_grf_write[inst->src[i].nr + r], n);
} else {
add_dep(last_fixed_grf_write, n);
}
@@ -976,9 +967,7 @@ fs_instruction_scheduler::calculate_deps()
add_dep(last_accumulator_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
- inst->src[i].file != UNIFORM &&
- (inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+ inst->src[i].file != UNIFORM) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -1003,36 +992,35 @@ fs_instruction_scheduler::calculate_deps()
}
/* write-after-write deps. */
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
if (post_reg_alloc) {
for (int r = 0; r < inst->regs_written; r++) {
- add_dep(last_grf_write[inst->dst.reg + r], n);
- last_grf_write[inst->dst.reg + r] = n;
+ add_dep(last_grf_write[inst->dst.nr + r], n);
+ last_grf_write[inst->dst.nr + r] = n;
}
} else {
for (int r = 0; r < inst->regs_written; r++) {
- add_dep(last_grf_write[inst->dst.reg * 16 + inst->dst.reg_offset + r], n);
- last_grf_write[inst->dst.reg * 16 + inst->dst.reg_offset + r] = n;
+ add_dep(last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r], n);
+ last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n;
}
}
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
- if (inst->dst.reg & BRW_MRF_COMPR4)
+ if (inst->dst.nr & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
}
- } else if (inst->dst.file == HW_REG &&
- inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ } else if (inst->dst.file == FIXED_GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width; r++)
- last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+ for (int r = 0; r < inst->regs_written; r++)
+ last_grf_write[inst->dst.nr + r] = n;
} else {
last_fixed_grf_write = n;
}
@@ -1080,24 +1068,19 @@ fs_instruction_scheduler::calculate_deps()
/* write-after-read deps. */
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
if (post_reg_alloc) {
for (int r = 0; r < inst->regs_read(i); r++)
- add_dep(n, last_grf_write[inst->src[i].reg + r], 0);
+ add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
} else {
for (int r = 0; r < inst->regs_read(i); r++) {
- add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], 0);
+ add_dep(n, last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], 0);
}
}
- } else if (inst->src[i].file == HW_REG &&
- (inst->src[i].fixed_hw_reg.file ==
- BRW_GENERAL_REGISTER_FILE)) {
+ } else if (inst->src[i].file == FIXED_GRF) {
if (post_reg_alloc) {
- int size = reg_width;
- if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
- size = 1;
- for (int r = 0; r < size; r++)
- add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0);
+ for (int r = 0; r < inst->regs_read(i); r++)
+ add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
} else {
add_dep(n, last_fixed_grf_write, 0);
}
@@ -1105,9 +1088,7 @@ fs_instruction_scheduler::calculate_deps()
add_dep(n, last_accumulator_write, 0);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
- inst->src[i].file != UNIFORM &&
- (inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+ inst->src[i].file != UNIFORM) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -1134,33 +1115,32 @@ fs_instruction_scheduler::calculate_deps()
/* Update the things this instruction wrote, so earlier reads
* can mark this as WAR dependency.
*/
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
if (post_reg_alloc) {
for (int r = 0; r < inst->regs_written; r++)
- last_grf_write[inst->dst.reg + r] = n;
+ last_grf_write[inst->dst.nr + r] = n;
} else {
for (int r = 0; r < inst->regs_written; r++) {
- last_grf_write[inst->dst.reg * 16 + inst->dst.reg_offset + r] = n;
+ last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n;
}
}
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
- if (inst->dst.reg & BRW_MRF_COMPR4)
+ if (inst->dst.nr & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
last_mrf_write[reg] = n;
}
- } else if (inst->dst.file == HW_REG &&
- inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ } else if (inst->dst.file == FIXED_GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width; r++)
- last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+ for (int r = 0; r < inst->regs_written; r++)
+ last_grf_write[inst->dst.nr + r] = n;
} else {
last_fixed_grf_write = n;
}
@@ -1222,21 +1202,17 @@ vec4_instruction_scheduler::calculate_deps()
/* read-after-write deps. */
for (int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
for (unsigned j = 0; j < inst->regs_read(i); ++j)
- add_dep(last_grf_write[inst->src[i].reg + j], n);
- } else if (inst->src[i].file == HW_REG &&
- (inst->src[i].fixed_hw_reg.file ==
- BRW_GENERAL_REGISTER_FILE)) {
+ add_dep(last_grf_write[inst->src[i].nr + j], n);
+ } else if (inst->src[i].file == FIXED_GRF) {
add_dep(last_fixed_grf_write, n);
} else if (inst->src[i].is_accumulator()) {
assert(last_accumulator_write);
add_dep(last_accumulator_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
- inst->src[i].file != UNIFORM &&
- (inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+ inst->src[i].file != UNIFORM) {
/* No reads from MRF, and ATTR is already translated away */
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
@@ -1265,16 +1241,15 @@ vec4_instruction_scheduler::calculate_deps()
}
/* write-after-write deps. */
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
for (unsigned j = 0; j < inst->regs_written; ++j) {
- add_dep(last_grf_write[inst->dst.reg + j], n);
- last_grf_write[inst->dst.reg + j] = n;
+ add_dep(last_grf_write[inst->dst.nr + j], n);
+ last_grf_write[inst->dst.nr + j] = n;
}
} else if (inst->dst.file == MRF) {
- add_dep(last_mrf_write[inst->dst.reg], n);
- last_mrf_write[inst->dst.reg] = n;
- } else if (inst->dst.file == HW_REG &&
- inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ add_dep(last_mrf_write[inst->dst.nr], n);
+ last_mrf_write[inst->dst.nr] = n;
+ } else if (inst->dst.file == FIXED_GRF) {
last_fixed_grf_write = n;
} else if (inst->dst.is_accumulator()) {
add_dep(last_accumulator_write, n);
@@ -1320,20 +1295,16 @@ vec4_instruction_scheduler::calculate_deps()
/* write-after-read deps. */
for (int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
for (unsigned j = 0; j < inst->regs_read(i); ++j)
- add_dep(n, last_grf_write[inst->src[i].reg + j]);
- } else if (inst->src[i].file == HW_REG &&
- (inst->src[i].fixed_hw_reg.file ==
- BRW_GENERAL_REGISTER_FILE)) {
+ add_dep(n, last_grf_write[inst->src[i].nr + j]);
+ } else if (inst->src[i].file == FIXED_GRF) {
add_dep(n, last_fixed_grf_write);
} else if (inst->src[i].is_accumulator()) {
add_dep(n, last_accumulator_write);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
- inst->src[i].file != UNIFORM &&
- (inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+ inst->src[i].file != UNIFORM) {
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
add_barrier_deps(n);
@@ -1361,13 +1332,12 @@ vec4_instruction_scheduler::calculate_deps()
/* Update the things this instruction wrote, so earlier reads
* can mark this as WAR dependency.
*/
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
for (unsigned j = 0; j < inst->regs_written; ++j)
- last_grf_write[inst->dst.reg + j] = n;
+ last_grf_write[inst->dst.nr + j] = n;
} else if (inst->dst.file == MRF) {
- last_mrf_write[inst->dst.reg] = n;
- } else if (inst->dst.file == HW_REG &&
- inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ last_mrf_write[inst->dst.nr] = n;
+ } else if (inst->dst.file == FIXED_GRF) {
last_fixed_grf_write = n;
} else if (inst->dst.is_accumulator()) {
last_accumulator_write = n;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 063cb84a958..1f3ae7ab5e6 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -150,6 +150,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
compiler->glsl_compiler_options[i].NirOptions = nir_options;
+
+ compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
}
return compiler;
@@ -291,7 +293,7 @@ const char *
brw_instruction_name(enum opcode op)
{
switch (op) {
- case BRW_OPCODE_MOV ... BRW_OPCODE_NOP:
+ case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP:
assert(opcode_descs[op].name);
return opcode_descs[op].name;
case FS_OPCODE_FB_WRITE:
@@ -354,6 +356,10 @@ brw_instruction_name(enum opcode op)
return "txf_cms";
case SHADER_OPCODE_TXF_CMS_LOGICAL:
return "txf_cms_logical";
+ case SHADER_OPCODE_TXF_CMS_W:
+ return "txf_cms_w";
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+ return "txf_cms_w_logical";
case SHADER_OPCODE_TXF_UMS:
return "txf_ums";
case SHADER_OPCODE_TXF_UMS_LOGICAL:
@@ -426,6 +432,8 @@ brw_instruction_name(enum opcode op)
return "gen8_urb_write_simd8_masked_per_slot";
case SHADER_OPCODE_URB_READ_SIMD8:
return "urb_read_simd8";
+ case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
+ return "urb_read_simd8_per_slot";
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel";
@@ -561,7 +569,7 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
unsigned ud;
int d;
float f;
- } imm = { reg->dw1.ud }, sat_imm = { 0 };
+ } imm = { reg->ud }, sat_imm = { 0 };
switch (type) {
case BRW_REGISTER_TYPE_UD:
@@ -592,7 +600,7 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
}
if (imm.ud != sat_imm.ud) {
- reg->dw1.ud = sat_imm.ud;
+ reg->ud = sat_imm.ud;
return true;
}
return false;
@@ -604,17 +612,17 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
switch (type) {
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_UD:
- reg->dw1.d = -reg->dw1.d;
+ reg->d = -reg->d;
return true;
case BRW_REGISTER_TYPE_W:
case BRW_REGISTER_TYPE_UW:
- reg->dw1.d = -(int16_t)reg->dw1.ud;
+ reg->d = -(int16_t)reg->ud;
return true;
case BRW_REGISTER_TYPE_F:
- reg->dw1.f = -reg->dw1.f;
+ reg->f = -reg->f;
return true;
case BRW_REGISTER_TYPE_VF:
- reg->dw1.ud ^= 0x80808080;
+ reg->ud ^= 0x80808080;
return true;
case BRW_REGISTER_TYPE_UB:
case BRW_REGISTER_TYPE_B:
@@ -638,16 +646,16 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
{
switch (type) {
case BRW_REGISTER_TYPE_D:
- reg->dw1.d = abs(reg->dw1.d);
+ reg->d = abs(reg->d);
return true;
case BRW_REGISTER_TYPE_W:
- reg->dw1.d = abs((int16_t)reg->dw1.ud);
+ reg->d = abs((int16_t)reg->ud);
return true;
case BRW_REGISTER_TYPE_F:
- reg->dw1.f = fabsf(reg->dw1.f);
+ reg->f = fabsf(reg->f);
return true;
case BRW_REGISTER_TYPE_VF:
- reg->dw1.ud &= ~0x80808080;
+ reg->ud &= ~0x80808080;
return true;
case BRW_REGISTER_TYPE_UB:
case BRW_REGISTER_TYPE_B:
@@ -697,7 +705,7 @@ backend_reg::is_zero() const
if (file != IMM)
return false;
- return fixed_hw_reg.dw1.d == 0;
+ return d == 0;
}
bool
@@ -707,8 +715,8 @@ backend_reg::is_one() const
return false;
return type == BRW_REGISTER_TYPE_F
- ? fixed_hw_reg.dw1.f == 1.0
- : fixed_hw_reg.dw1.d == 1;
+ ? f == 1.0
+ : d == 1;
}
bool
@@ -719,9 +727,9 @@ backend_reg::is_negative_one() const
switch (type) {
case BRW_REGISTER_TYPE_F:
- return fixed_hw_reg.dw1.f == -1.0;
+ return f == -1.0;
case BRW_REGISTER_TYPE_D:
- return fixed_hw_reg.dw1.d == -1;
+ return d == -1;
default:
return false;
}
@@ -730,25 +738,21 @@ backend_reg::is_negative_one() const
bool
backend_reg::is_null() const
{
- return file == HW_REG &&
- fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
- fixed_hw_reg.nr == BRW_ARF_NULL;
+ return file == ARF && nr == BRW_ARF_NULL;
}
bool
backend_reg::is_accumulator() const
{
- return file == HW_REG &&
- fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
- fixed_hw_reg.nr == BRW_ARF_ACCUMULATOR;
+ return file == ARF && nr == BRW_ARF_ACCUMULATOR;
}
bool
backend_reg::in_range(const backend_reg &r, unsigned n) const
{
return (file == r.file &&
- reg == r.reg &&
+ nr == r.nr &&
reg_offset >= r.reg_offset &&
reg_offset < r.reg_offset + n);
}
@@ -779,7 +783,7 @@ backend_instruction::is_commutative() const
bool
backend_instruction::is_3src() const
{
- return opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[opcode].nsrc == 3;
+ return ::is_3src(opcode);
}
bool
@@ -790,6 +794,7 @@ backend_instruction::is_tex() const
opcode == SHADER_OPCODE_TXD ||
opcode == SHADER_OPCODE_TXF ||
opcode == SHADER_OPCODE_TXF_CMS ||
+ opcode == SHADER_OPCODE_TXF_CMS_W ||
opcode == SHADER_OPCODE_TXF_UMS ||
opcode == SHADER_OPCODE_TXF_MCS ||
opcode == SHADER_OPCODE_TXL ||
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index f4647cca4f9..c4a37187ce2 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -38,38 +38,18 @@
#define MAX_SAMPLER_MESSAGE_SIZE 11
#define MAX_VGRF_SIZE 16
-enum PACKED register_file {
- BAD_FILE,
- GRF,
- MRF,
- IMM,
- HW_REG, /* a struct brw_reg */
- ATTR,
- UNIFORM, /* prog_data->params[reg] */
-};
-
-struct backend_reg
-{
#ifdef __cplusplus
+struct backend_reg : public brw_reg
+{
+ backend_reg() {}
+ backend_reg(struct brw_reg reg) : brw_reg(reg) {}
+
bool is_zero() const;
bool is_one() const;
bool is_negative_one() const;
bool is_null() const;
bool is_accumulator() const;
bool in_range(const backend_reg &r, unsigned n) const;
-#endif
-
- enum register_file file; /**< Register file: GRF, MRF, IMM. */
- enum brw_reg_type type; /**< Register type: BRW_REGISTER_TYPE_* */
-
- /**
- * Register number.
- *
- * For GRF, it's a virtual register number until register allocation.
- *
- * For MRF, it's the hardware register.
- */
- uint16_t reg;
/**
* Offset within the virtual register.
@@ -81,12 +61,8 @@ struct backend_reg
* For uniforms, this is in units of 1 float.
*/
uint16_t reg_offset;
-
- struct brw_reg fixed_hw_reg;
-
- bool negate;
- bool abs;
};
+#endif
struct cfg_t;
struct bblock_t;
@@ -274,6 +250,7 @@ bool brw_cs_precompile(struct gl_context *ctx,
int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
+int type_size_vec4_times_4(const struct glsl_type *type);
bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 2aa1248fea6..94734bae621 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -172,7 +172,6 @@ brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags)
/* brw_binding_tables.c */
void brw_upload_binding_table(struct brw_context *brw,
uint32_t packet_name,
- GLbitfield brw_new_binding_table,
const struct brw_stage_prog_data *prog_data,
struct brw_stage_state *stage_state);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 0344b8a7fb0..6f8daf6d4d2 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -589,9 +589,7 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_PSP),
DEFINE_BIT(BRW_NEW_SURFACES),
- DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
- DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
- DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
+ DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
DEFINE_BIT(BRW_NEW_INDICES),
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_BATCH),
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 01eb1580953..a086b43e11a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -51,12 +51,12 @@ src_reg::init()
this->file = BAD_FILE;
}
-src_reg::src_reg(register_file file, int reg, const glsl_type *type)
+src_reg::src_reg(enum brw_reg_file file, int nr, const glsl_type *type)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
this->swizzle = brw_swizzle_for_size(type->vector_elements);
else
@@ -77,7 +77,7 @@ src_reg::src_reg(float f)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_F;
- this->fixed_hw_reg.dw1.f = f;
+ this->f = f;
}
src_reg::src_reg(uint32_t u)
@@ -86,7 +86,7 @@ src_reg::src_reg(uint32_t u)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_UD;
- this->fixed_hw_reg.dw1.ud = u;
+ this->ud = u;
}
src_reg::src_reg(int32_t i)
@@ -95,7 +95,7 @@ src_reg::src_reg(int32_t i)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_D;
- this->fixed_hw_reg.dw1.d = i;
+ this->d = i;
}
src_reg::src_reg(uint8_t vf[4])
@@ -104,7 +104,7 @@ src_reg::src_reg(uint8_t vf[4])
this->file = IMM;
this->type = BRW_REGISTER_TYPE_VF;
- memcpy(&this->fixed_hw_reg.dw1.ud, vf, sizeof(unsigned));
+ memcpy(&this->ud, vf, sizeof(unsigned));
}
src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
@@ -113,31 +113,21 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_VF;
- this->fixed_hw_reg.dw1.ud = (vf0 << 0) |
- (vf1 << 8) |
- (vf2 << 16) |
- (vf3 << 24);
+ this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24);
}
-src_reg::src_reg(struct brw_reg reg)
+src_reg::src_reg(struct brw_reg reg) :
+ backend_reg(reg)
{
- init();
-
- this->file = HW_REG;
- this->fixed_hw_reg = reg;
- this->type = reg.type;
+ this->reg_offset = 0;
+ this->reladdr = NULL;
}
-src_reg::src_reg(const dst_reg &reg)
+src_reg::src_reg(const dst_reg &reg) :
+ backend_reg(static_cast<struct brw_reg>(reg))
{
- init();
-
- this->file = reg.file;
- this->reg = reg.reg;
this->reg_offset = reg.reg_offset;
- this->type = reg.type;
this->reladdr = reg.reladdr;
- this->fixed_hw_reg = reg.fixed_hw_reg;
this->swizzle = brw_swizzle_for_mask(reg.writemask);
}
@@ -154,73 +144,58 @@ dst_reg::dst_reg()
init();
}
-dst_reg::dst_reg(register_file file, int reg)
+dst_reg::dst_reg(enum brw_reg_file file, int nr)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
}
-dst_reg::dst_reg(register_file file, int reg, const glsl_type *type,
+dst_reg::dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
unsigned writemask)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
this->type = brw_type_for_base_type(type);
this->writemask = writemask;
}
-dst_reg::dst_reg(register_file file, int reg, brw_reg_type type,
+dst_reg::dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
unsigned writemask)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
this->type = type;
this->writemask = writemask;
}
-dst_reg::dst_reg(struct brw_reg reg)
+dst_reg::dst_reg(struct brw_reg reg) :
+ backend_reg(reg)
{
- init();
-
- this->file = HW_REG;
- this->fixed_hw_reg = reg;
- this->type = reg.type;
+ this->reg_offset = 0;
+ this->reladdr = NULL;
}
-dst_reg::dst_reg(const src_reg &reg)
+dst_reg::dst_reg(const src_reg &reg) :
+ backend_reg(static_cast<struct brw_reg>(reg))
{
- init();
-
- this->file = reg.file;
- this->reg = reg.reg;
this->reg_offset = reg.reg_offset;
- this->type = reg.type;
this->writemask = brw_mask_for_swizzle(reg.swizzle);
this->reladdr = reg.reladdr;
- this->fixed_hw_reg = reg.fixed_hw_reg;
}
bool
dst_reg::equals(const dst_reg &r) const
{
- return (file == r.file &&
- reg == r.reg &&
+ return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
reg_offset == r.reg_offset &&
- type == r.type &&
- negate == r.negate &&
- abs == r.abs &&
- writemask == r.writemask &&
(reladdr == r.reladdr ||
- (reladdr && r.reladdr && reladdr->equals(*r.reladdr))) &&
- ((file != HW_REG && file != IMM) ||
- memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
- sizeof(fixed_hw_reg)) == 0));
+ (reladdr && r.reladdr && reladdr->equals(*r.reladdr))));
}
bool
@@ -339,6 +314,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
@@ -354,16 +330,9 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
bool
src_reg::equals(const src_reg &r) const
{
- return (file == r.file &&
- reg == r.reg &&
+ return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
reg_offset == r.reg_offset &&
- type == r.type &&
- negate == r.negate &&
- abs == r.abs &&
- swizzle == r.swizzle &&
- !reladdr && !r.reladdr &&
- memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
- sizeof(fixed_hw_reg)) == 0);
+ !reladdr && !r.reladdr);
}
bool
@@ -372,7 +341,7 @@ vec4_visitor::opt_vector_float()
bool progress = false;
int last_reg = -1, last_reg_offset = -1;
- enum register_file last_reg_file = BAD_FILE;
+ enum brw_reg_file last_reg_file = BAD_FILE;
int remaining_channels = 0;
uint8_t imm[4];
@@ -380,10 +349,10 @@ vec4_visitor::opt_vector_float()
vec4_instruction *imm_inst[4];
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- if (last_reg != inst->dst.reg ||
+ if (last_reg != inst->dst.nr ||
last_reg_offset != inst->dst.reg_offset ||
last_reg_file != inst->dst.file) {
- last_reg = inst->dst.reg;
+ last_reg = inst->dst.nr;
last_reg_offset = inst->dst.reg_offset;
last_reg_file = inst->dst.file;
remaining_channels = WRITEMASK_XYZW;
@@ -396,7 +365,7 @@ vec4_visitor::opt_vector_float()
inst->src[0].file != IMM)
continue;
- int vf = brw_float_to_vf(inst->src[0].fixed_hw_reg.dw1.f);
+ int vf = brw_float_to_vf(inst->src[0].f);
if (vf == -1)
continue;
@@ -451,7 +420,9 @@ vec4_visitor::opt_reduce_swizzle()
bool progress = false;
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- if (inst->dst.file == BAD_FILE || inst->dst.file == HW_REG ||
+ if (inst->dst.file == BAD_FILE ||
+ inst->dst.file == ARF ||
+ inst->dst.file == FIXED_GRF ||
inst->is_send_from_grf())
continue;
@@ -479,7 +450,7 @@ vec4_visitor::opt_reduce_swizzle()
/* Update sources' swizzles. */
for (int i = 0; i < 3; i++) {
- if (inst->src[i].file != GRF &&
+ if (inst->src[i].file != VGRF &&
inst->src[i].file != ATTR &&
inst->src[i].file != UNIFORM)
continue;
@@ -505,7 +476,7 @@ vec4_visitor::split_uniform_registers()
/* Prior to this, uniforms have been in an array sized according to
* the number of vector uniforms present, sparsely filled (so an
* aggregate results in reg indices being skipped over). Now we're
- * going to cut those aggregates up so each .reg index is one
+ * going to cut those aggregates up so each .nr index is one
* vector. The goal is to make elimination of unused uniform
* components easier later.
*/
@@ -516,7 +487,7 @@ vec4_visitor::split_uniform_registers()
assert(!inst->src[i].reladdr);
- inst->src[i].reg += inst->src[i].reg_offset;
+ inst->src[i].nr += inst->src[i].reg_offset;
inst->src[i].reg_offset = 0;
}
}
@@ -565,7 +536,7 @@ vec4_visitor::pack_uniform_registers()
if (inst->src[i].file != UNIFORM)
continue;
- int reg = inst->src[i].reg;
+ int reg = inst->src[i].nr;
for (int c = 0; c < 4; c++) {
if (!(readmask & (1 << c)))
continue;
@@ -620,12 +591,12 @@ vec4_visitor::pack_uniform_registers()
/* Now, update the instructions for our repacked uniforms. */
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (int i = 0 ; i < 3; i++) {
- int src = inst->src[i].reg;
+ int src = inst->src[i].nr;
if (inst->src[i].file != UNIFORM)
continue;
- inst->src[i].reg = new_loc[src];
+ inst->src[i].nr = new_loc[src];
inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
new_chan[src], new_chan[src]);
}
@@ -659,8 +630,7 @@ vec4_visitor::opt_algebraic()
if (inst->dst.type != inst->src[0].type)
assert(!"unimplemented: saturate mixed types");
- if (brw_saturate_immediate(inst->dst.type,
- &inst->src[0].fixed_hw_reg)) {
+ if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) {
inst->saturate = false;
progress = true;
}
@@ -821,10 +791,10 @@ vec4_visitor::move_push_constants_to_pull_constants()
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
for (int i = 0 ; i < 3; i++) {
if (inst->src[i].file != UNIFORM ||
- pull_constant_loc[inst->src[i].reg] == -1)
+ pull_constant_loc[inst->src[i].nr] == -1)
continue;
- int uniform = inst->src[i].reg;
+ int uniform = inst->src[i].nr;
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
@@ -832,7 +802,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
pull_constant_loc[uniform]);
inst->src[i].file = temp.file;
- inst->src[i].reg = temp.reg;
+ inst->src[i].nr = temp.nr;
inst->src[i].reg_offset = temp.reg_offset;
inst->src[i].reladdr = NULL;
}
@@ -924,10 +894,10 @@ vec4_visitor::opt_set_dependency_control()
* on, don't do dependency control across the read.
*/
for (int i = 0; i < 3; i++) {
- int reg = inst->src[i].reg + inst->src[i].reg_offset;
- if (inst->src[i].file == GRF) {
+ int reg = inst->src[i].nr + inst->src[i].reg_offset;
+ if (inst->src[i].file == VGRF) {
last_grf_write[reg] = NULL;
- } else if (inst->src[i].file == HW_REG) {
+ } else if (inst->src[i].file == FIXED_GRF) {
memset(last_grf_write, 0, sizeof(last_grf_write));
break;
}
@@ -943,8 +913,8 @@ vec4_visitor::opt_set_dependency_control()
/* Now, see if we can do dependency control for this instruction
* against a previous one writing to its destination.
*/
- int reg = inst->dst.reg + inst->dst.reg_offset;
- if (inst->dst.file == GRF) {
+ int reg = inst->dst.nr + inst->dst.reg_offset;
+ if (inst->dst.file == VGRF || inst->dst.file == FIXED_GRF) {
if (last_grf_write[reg] &&
!(inst->dst.writemask & grf_channels_written[reg])) {
last_grf_write[reg]->no_dd_clear = true;
@@ -966,11 +936,6 @@ vec4_visitor::opt_set_dependency_control()
last_mrf_write[reg] = inst;
mrf_channels_written[reg] |= inst->dst.writemask;
- } else if (inst->dst.reg == HW_REG) {
- if (inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE)
- memset(last_grf_write, 0, sizeof(last_grf_write));
- if (inst->dst.fixed_hw_reg.file == BRW_MESSAGE_REGISTER_FILE)
- memset(last_mrf_write, 0, sizeof(last_mrf_write));
}
}
}
@@ -998,11 +963,8 @@ vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
if (mlen > 0)
return false;
- /* We can't use swizzles on the accumulator and that's really the only
- * HW_REG we would care to reswizzle so just disallow them all.
- */
for (int i = 0; i < 3; i++) {
- if (src[i].file == HW_REG)
+ if (src[i].is_accumulator())
return false;
}
@@ -1058,16 +1020,16 @@ vec4_visitor::opt_register_coalesce()
next_ip++;
if (inst->opcode != BRW_OPCODE_MOV ||
- (inst->dst.file != GRF && inst->dst.file != MRF) ||
+ (inst->dst.file != VGRF && inst->dst.file != MRF) ||
inst->predicate ||
- inst->src[0].file != GRF ||
+ inst->src[0].file != VGRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
continue;
/* Remove no-op MOVs */
if (inst->dst.file == inst->src[0].file &&
- inst->dst.reg == inst->src[0].reg &&
+ inst->dst.nr == inst->src[0].nr &&
inst->dst.reg_offset == inst->src[0].reg_offset) {
bool is_nop_mov = true;
@@ -1123,7 +1085,7 @@ vec4_visitor::opt_register_coalesce()
if (devinfo->gen == 6) {
/* gen6 math instructions must have the destination be
- * GRF, so no compute-to-MRF for them.
+ * VGRF, so no compute-to-MRF for them.
*/
if (scan_inst->is_math()) {
break;
@@ -1188,8 +1150,8 @@ vec4_visitor::opt_register_coalesce()
* in the register instead.
*/
if (to_mrf && scan_inst->mlen > 0) {
- if (inst->dst.reg >= scan_inst->base_mrf &&
- inst->dst.reg < scan_inst->base_mrf + scan_inst->mlen) {
+ if (inst->dst.nr >= scan_inst->base_mrf &&
+ inst->dst.nr < scan_inst->base_mrf + scan_inst->mlen) {
break;
}
} else {
@@ -1211,13 +1173,13 @@ vec4_visitor::opt_register_coalesce()
*/
vec4_instruction *scan_inst = _scan_inst;
while (scan_inst != inst) {
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg == inst->src[0].reg &&
+ if (scan_inst->dst.file == VGRF &&
+ scan_inst->dst.nr == inst->src[0].nr &&
scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
scan_inst->reswizzle(inst->dst.writemask,
inst->src[0].swizzle);
scan_inst->dst.file = inst->dst.file;
- scan_inst->dst.reg = inst->dst.reg;
+ scan_inst->dst.nr = inst->dst.nr;
scan_inst->dst.reg_offset = inst->dst.reg_offset;
if (inst->saturate &&
inst->dst.type != scan_inst->dst.type) {
@@ -1314,12 +1276,12 @@ vec4_visitor::split_virtual_grfs()
* to split.
*/
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
- if (inst->dst.file == GRF && inst->regs_written > 1)
- split_grf[inst->dst.reg] = false;
+ if (inst->dst.file == VGRF && inst->regs_written > 1)
+ split_grf[inst->dst.nr] = false;
for (int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF && inst->regs_read(i) > 1)
- split_grf[inst->src[i].reg] = false;
+ if (inst->src[i].file == VGRF && inst->regs_read(i) > 1)
+ split_grf[inst->src[i].nr] = false;
}
}
@@ -1340,16 +1302,16 @@ vec4_visitor::split_virtual_grfs()
}
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
- if (inst->dst.file == GRF && split_grf[inst->dst.reg] &&
+ if (inst->dst.file == VGRF && split_grf[inst->dst.nr] &&
inst->dst.reg_offset != 0) {
- inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
+ inst->dst.nr = (new_virtual_grf[inst->dst.nr] +
inst->dst.reg_offset - 1);
inst->dst.reg_offset = 0;
}
for (int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF && split_grf[inst->src[i].reg] &&
+ if (inst->src[i].file == VGRF && split_grf[inst->src[i].nr] &&
inst->src[i].reg_offset != 0) {
- inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
+ inst->src[i].nr = (new_virtual_grf[inst->src[i].nr] +
inst->src[i].reg_offset - 1);
inst->src[i].reg_offset = 0;
}
@@ -1391,38 +1353,35 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, " ");
switch (inst->dst.file) {
- case GRF:
- fprintf(file, "vgrf%d.%d", inst->dst.reg, inst->dst.reg_offset);
+ case VGRF:
+ fprintf(file, "vgrf%d.%d", inst->dst.nr, inst->dst.reg_offset);
+ break;
+ case FIXED_GRF:
+ fprintf(file, "g%d", inst->dst.nr);
break;
case MRF:
- fprintf(file, "m%d", inst->dst.reg);
+ fprintf(file, "m%d", inst->dst.nr);
break;
- case HW_REG:
- if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
- switch (inst->dst.fixed_hw_reg.nr) {
- case BRW_ARF_NULL:
- fprintf(file, "null");
- break;
- case BRW_ARF_ADDRESS:
- fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr);
- break;
- case BRW_ARF_ACCUMULATOR:
- fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr);
- break;
- case BRW_ARF_FLAG:
- fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
- inst->dst.fixed_hw_reg.subnr);
- break;
- default:
- fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
- inst->dst.fixed_hw_reg.subnr);
- break;
- }
- } else {
- fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr);
+ case ARF:
+ switch (inst->dst.nr) {
+ case BRW_ARF_NULL:
+ fprintf(file, "null");
+ break;
+ case BRW_ARF_ADDRESS:
+ fprintf(file, "a0.%d", inst->dst.subnr);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ fprintf(file, "acc%d", inst->dst.subnr);
+ break;
+ case BRW_ARF_FLAG:
+ fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+ break;
+ default:
+ fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+ break;
}
- if (inst->dst.fixed_hw_reg.subnr)
- fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr);
+ if (inst->dst.subnr)
+ fprintf(file, "+%d", inst->dst.subnr);
break;
case BAD_FILE:
fprintf(file, "(null)");
@@ -1454,70 +1413,61 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->src[i].abs)
fprintf(file, "|");
switch (inst->src[i].file) {
- case GRF:
- fprintf(file, "vgrf%d", inst->src[i].reg);
+ case VGRF:
+ fprintf(file, "vgrf%d", inst->src[i].nr);
+ break;
+ case FIXED_GRF:
+ fprintf(file, "g%d", inst->src[i].nr);
break;
case ATTR:
- fprintf(file, "attr%d", inst->src[i].reg);
+ fprintf(file, "attr%d", inst->src[i].nr);
break;
case UNIFORM:
- fprintf(file, "u%d", inst->src[i].reg);
+ fprintf(file, "u%d", inst->src[i].nr);
break;
case IMM:
switch (inst->src[i].type) {
case BRW_REGISTER_TYPE_F:
- fprintf(file, "%fF", inst->src[i].fixed_hw_reg.dw1.f);
+ fprintf(file, "%fF", inst->src[i].f);
break;
case BRW_REGISTER_TYPE_D:
- fprintf(file, "%dD", inst->src[i].fixed_hw_reg.dw1.d);
+ fprintf(file, "%dD", inst->src[i].d);
break;
case BRW_REGISTER_TYPE_UD:
- fprintf(file, "%uU", inst->src[i].fixed_hw_reg.dw1.ud);
+ fprintf(file, "%uU", inst->src[i].ud);
break;
case BRW_REGISTER_TYPE_VF:
fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 0) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 8) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 16) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 24) & 0xff));
+ brw_vf_to_float((inst->src[i].ud >> 0) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 8) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
break;
default:
fprintf(file, "???");
break;
}
break;
- case HW_REG:
- if (inst->src[i].fixed_hw_reg.negate)
- fprintf(file, "-");
- if (inst->src[i].fixed_hw_reg.abs)
- fprintf(file, "|");
- if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
- switch (inst->src[i].fixed_hw_reg.nr) {
- case BRW_ARF_NULL:
- fprintf(file, "null");
- break;
- case BRW_ARF_ADDRESS:
- fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr);
- break;
- case BRW_ARF_ACCUMULATOR:
- fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr);
- break;
- case BRW_ARF_FLAG:
- fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
- inst->src[i].fixed_hw_reg.subnr);
- break;
- default:
- fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
- inst->src[i].fixed_hw_reg.subnr);
- break;
- }
- } else {
- fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr);
+ case ARF:
+ switch (inst->src[i].nr) {
+ case BRW_ARF_NULL:
+ fprintf(file, "null");
+ break;
+ case BRW_ARF_ADDRESS:
+ fprintf(file, "a0.%d", inst->src[i].subnr);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ fprintf(file, "acc%d", inst->src[i].subnr);
+ break;
+ case BRW_ARF_FLAG:
+ fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+ break;
+ default:
+ fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+ break;
}
- if (inst->src[i].fixed_hw_reg.subnr)
- fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr);
- if (inst->src[i].fixed_hw_reg.abs)
- fprintf(file, "|");
+ if (inst->src[i].subnr)
+ fprintf(file, "+%d", inst->src[i].subnr);
break;
case BAD_FILE:
fprintf(file, "(null)");
@@ -1528,8 +1478,8 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
/* Don't print .0; and only VGRFs have reg_offsets and sizes */
if (inst->src[i].reg_offset != 0 &&
- inst->src[i].file == GRF &&
- alloc.sizes[inst->src[i].reg] != 1)
+ inst->src[i].file == VGRF &&
+ alloc.sizes[inst->src[i].nr] != 1)
fprintf(file, ".%d", inst->src[i].reg_offset);
if (inst->src[i].file != IMM) {
@@ -1551,6 +1501,9 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, ", ");
}
+ if (inst->force_writemask_all)
+ fprintf(file, " NoMask");
+
fprintf(file, "\n");
}
@@ -1584,7 +1537,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
/* We have to support ATTR as a destination for GL_FIXED fixup. */
if (inst->dst.file == ATTR) {
- int grf = attribute_map[inst->dst.reg + inst->dst.reg_offset];
+ int grf = attribute_map[inst->dst.nr + inst->dst.reg_offset];
/* All attributes used in the shader need to have been assigned a
* hardware register by the caller
@@ -1593,17 +1546,16 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
struct brw_reg reg = attribute_to_hw_reg(grf, interleaved);
reg.type = inst->dst.type;
- reg.dw1.bits.writemask = inst->dst.writemask;
+ reg.writemask = inst->dst.writemask;
- inst->dst.file = HW_REG;
- inst->dst.fixed_hw_reg = reg;
+ inst->dst = reg;
}
for (int i = 0; i < 3; i++) {
if (inst->src[i].file != ATTR)
continue;
- int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+ int grf = attribute_map[inst->src[i].nr + inst->src[i].reg_offset];
/* All attributes used in the shader need to have been assigned a
* hardware register by the caller
@@ -1611,15 +1563,14 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
assert(grf != 0);
struct brw_reg reg = attribute_to_hw_reg(grf, interleaved);
- reg.dw1.bits.swizzle = inst->src[i].swizzle;
+ reg.swizzle = inst->src[i].swizzle;
reg.type = inst->src[i].type;
if (inst->src[i].abs)
reg = brw_abs(reg);
if (inst->src[i].negate)
reg = negate(reg);
- inst->src[i].file = HW_REG;
- inst->src[i].fixed_hw_reg = reg;
+ inst->src[i] = reg;
}
}
}
@@ -1803,26 +1754,26 @@ vec4_visitor::convert_to_hw_regs()
struct src_reg &src = inst->src[i];
struct brw_reg reg;
switch (src.file) {
- case GRF:
- reg = brw_vec8_grf(src.reg + src.reg_offset, 0);
+ case VGRF:
+ reg = brw_vec8_grf(src.nr + src.reg_offset, 0);
reg.type = src.type;
- reg.dw1.bits.swizzle = src.swizzle;
+ reg.swizzle = src.swizzle;
reg.abs = src.abs;
reg.negate = src.negate;
break;
case IMM:
reg = brw_imm_reg(src.type);
- reg.dw1.ud = src.fixed_hw_reg.dw1.ud;
+ reg.ud = src.ud;
break;
case UNIFORM:
reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
- (src.reg + src.reg_offset) / 2,
- ((src.reg + src.reg_offset) % 2) * 4),
+ (src.nr + src.reg_offset) / 2,
+ ((src.nr + src.reg_offset) % 2) * 4),
0, 4, 1);
reg.type = src.type;
- reg.dw1.bits.swizzle = src.swizzle;
+ reg.swizzle = src.swizzle;
reg.abs = src.abs;
reg.negate = src.negate;
@@ -1830,8 +1781,8 @@ vec4_visitor::convert_to_hw_regs()
assert(!src.reladdr);
break;
- case HW_REG:
- assert(src.type == src.fixed_hw_reg.type);
+ case ARF:
+ case FIXED_GRF:
continue;
case BAD_FILE:
@@ -1843,29 +1794,29 @@ vec4_visitor::convert_to_hw_regs()
case ATTR:
unreachable("not reached");
}
- src.fixed_hw_reg = reg;
+ src = reg;
}
dst_reg &dst = inst->dst;
struct brw_reg reg;
switch (inst->dst.file) {
- case GRF:
- reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+ case VGRF:
+ reg = brw_vec8_grf(dst.nr + dst.reg_offset, 0);
reg.type = dst.type;
- reg.dw1.bits.writemask = dst.writemask;
+ reg.writemask = dst.writemask;
break;
case MRF:
- assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
- reg = brw_message_reg(dst.reg + dst.reg_offset);
+ assert(((dst.nr + dst.reg_offset) & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
+ reg = brw_message_reg(dst.nr + dst.reg_offset);
reg.type = dst.type;
- reg.dw1.bits.writemask = dst.writemask;
+ reg.writemask = dst.writemask;
break;
- case HW_REG:
- assert(dst.type == dst.fixed_hw_reg.type);
- reg = dst.fixed_hw_reg;
+ case ARF:
+ case FIXED_GRF:
+ reg = dst;
break;
case BAD_FILE:
@@ -1878,7 +1829,7 @@ vec4_visitor::convert_to_hw_regs()
unreachable("not reached");
}
- dst.fixed_hw_reg = reg;
+ dst = reg;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
index a90cadb77db..a76a4ce4639 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -144,7 +144,7 @@ namespace brw {
assert(dispatch_width() <= 32);
if (n > 0)
- return retype(dst_reg(GRF, shader->alloc.allocate(
+ return retype(dst_reg(VGRF, shader->alloc.allocate(
n * DIV_ROUND_UP(type_sz(type), 4))),
type);
else
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
index 329f24269ce..7aa8f5d9b8f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
@@ -48,7 +48,7 @@ opt_cmod_propagation_local(bblock_t *block)
inst->opcode != BRW_OPCODE_MOV) ||
inst->predicate != BRW_PREDICATE_NONE ||
!inst->dst.is_null() ||
- inst->src[0].file != GRF ||
+ inst->src[0].file != VGRF ||
inst->src[0].abs)
continue;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index db99ecba35a..3b76e36a803 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -47,7 +47,7 @@ is_direct_copy(vec4_instruction *inst)
{
return (inst->opcode == BRW_OPCODE_MOV &&
!inst->predicate &&
- inst->dst.file == GRF &&
+ inst->dst.file == VGRF &&
!inst->dst.reladdr &&
!inst->src[0].reladdr &&
(inst->dst.type == inst->src[0].type ||
@@ -70,8 +70,8 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
const src_reg *src = values[ch];
/* consider GRF only */
- assert(inst->dst.file == GRF);
- if (!src || src->file != GRF)
+ assert(inst->dst.file == VGRF);
+ if (!src || src->file != VGRF)
return false;
return (src->in_range(inst->dst, inst->regs_written) &&
@@ -134,21 +134,20 @@ try_constant_propagate(const struct brw_device_info *devinfo,
if (inst->src[arg].abs) {
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
- !brw_abs_immediate(value.type, &value.fixed_hw_reg)) {
+ !brw_abs_immediate(value.type, &value)) {
return false;
}
}
if (inst->src[arg].negate) {
if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
- !brw_negate_immediate(value.type, &value.fixed_hw_reg)) {
+ !brw_negate_immediate(value.type, &value)) {
return false;
}
}
if (value.type == BRW_REGISTER_TYPE_VF)
- value.fixed_hw_reg.dw1.ud = swizzle_vf_imm(value.fixed_hw_reg.dw1.ud,
- inst->src[arg].swizzle);
+ value.ud = swizzle_vf_imm(value.ud, inst->src[arg].swizzle);
switch (inst->opcode) {
case BRW_OPCODE_MOV:
@@ -272,7 +271,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
for (int i = 1; i < 4; i++) {
/* This is equals() except we don't care about the swizzle. */
if (value.file != entry->value[i]->file ||
- value.reg != entry->value[i]->reg ||
+ value.nr != entry->value[i]->nr ||
value.reg_offset != entry->value[i]->reg_offset ||
value.type != entry->value[i]->type ||
value.negate != entry->value[i]->negate ||
@@ -293,7 +292,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
/* Check that we can propagate that value */
if (value.file != UNIFORM &&
- value.file != GRF &&
+ value.file != VGRF &&
value.file != ATTR)
return false;
@@ -359,8 +358,8 @@ try_copy_propagate(const struct brw_device_info *devinfo,
inst->src[0].type != BRW_REGISTER_TYPE_F ||
inst->src[1].file != IMM ||
inst->src[1].type != BRW_REGISTER_TYPE_F ||
- inst->src[1].fixed_hw_reg.dw1.f < 0.0 ||
- inst->src[1].fixed_hw_reg.dw1.f > 1.0) {
+ inst->src[1].f < 0.0 ||
+ inst->src[1].f > 1.0) {
return false;
}
if (!inst->saturate)
@@ -417,14 +416,14 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
}
/* For each source arg, see if each component comes from a copy
- * from the same type file (IMM, GRF, UNIFORM), and try
+ * from the same type file (IMM, VGRF, UNIFORM), and try
* optimizing out access to the copy result
*/
for (int i = 2; i >= 0; i--) {
/* Copied values end up in GRFs, and we don't track reladdr
* accesses.
*/
- if (inst->src[i].file != GRF ||
+ if (inst->src[i].file != VGRF ||
inst->src[i].reladdr)
continue;
@@ -432,7 +431,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
if (inst->regs_read(i) != 1)
continue;
- int reg = (alloc.offsets[inst->src[i].reg] +
+ int reg = (alloc.offsets[inst->src[i].nr] +
inst->src[i].reg_offset);
/* Find the regs that each swizzle component came from.
@@ -473,9 +472,9 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
}
/* Track available source registers. */
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
const int reg =
- alloc.offsets[inst->dst.reg] + inst->dst.reg_offset;
+ alloc.offsets[inst->dst.nr] + inst->dst.reg_offset;
/* Update our destination's current channel values. For a direct copy,
* the value is the newly propagated source. Otherwise, we don't know
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 5a277f74c44..85cbf24092e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -143,7 +143,8 @@ vec4_visitor::opt_cse_local(bblock_t *block)
foreach_inst_in_block (vec4_instruction, inst, block) {
/* Skip some cases. */
if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
- (inst->dst.file != HW_REG || inst->dst.is_null()))
+ ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||
+ inst->dst.is_null()))
{
bool found = false;
@@ -174,7 +175,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
*/
bool no_existing_temp = entry->tmp.file == BAD_FILE;
if (no_existing_temp && !entry->generator->dst.is_null()) {
- entry->tmp = retype(src_reg(GRF, alloc.allocate(
+ entry->tmp = retype(src_reg(VGRF, alloc.allocate(
entry->generator->regs_written),
NULL), inst->dst.type);
@@ -233,7 +234,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
* overwrote.
*/
if (inst->dst.file == entry->generator->src[i].file &&
- inst->dst.reg == entry->generator->src[i].reg) {
+ inst->dst.nr == entry->generator->src[i].nr) {
entry->remove();
ralloc_free(entry);
break;
@@ -242,7 +243,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
/* Kill any AEB entries using registers that don't get reused any
* more -- a sure sign they'll fail operands_match().
*/
- if (src->file == GRF) {
+ if (src->file == VGRF) {
if (var_range_end(var_from_reg(alloc, *src), 4) < ip) {
entry->remove();
ralloc_free(entry);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 284e0a8d0a5..58aed810fcd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -78,11 +78,11 @@ vec4_visitor::dead_code_eliminate()
sizeof(BITSET_WORD));
foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
- if ((inst->dst.file == GRF && !inst->has_side_effects()) ||
+ if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
(inst->dst.is_null() && inst->writes_flag())){
bool result_live[4] = { false };
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
for (unsigned i = 0; i < inst->regs_written; i++) {
for (int c = 0; c < 4; c++)
result_live[c] |= BITSET_TEST(
@@ -134,7 +134,7 @@ vec4_visitor::dead_code_eliminate()
}
}
- if (inst->dst.file == GRF && !inst->predicate) {
+ if (inst->dst.file == VGRF && !inst->predicate) {
for (unsigned i = 0; i < inst->regs_written; i++) {
for (int c = 0; c < 4; c++) {
if (inst->dst.writemask & (1 << c)) {
@@ -145,13 +145,13 @@ vec4_visitor::dead_code_eliminate()
}
}
- if (inst->writes_flag()) {
+ if (inst->writes_flag() && !inst->predicate) {
for (unsigned c = 0; c < 4; c++)
BITSET_CLEAR(flag_live, c);
}
for (int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
for (unsigned j = 0; j < inst->regs_read(i); j++) {
for (int c = 0; c < 4; c++) {
BITSET_SET(live, var_from_reg(alloc,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 8bc21df5ffc..20107ac2054 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -46,7 +46,7 @@ check_gen6_math_src_arg(struct brw_reg src)
/* Source swizzles are ignored. */
assert(!src.abs);
assert(!src.negate);
- assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+ assert(src.swizzle == BRW_SWIZZLE_XYZW);
}
static void
@@ -57,7 +57,7 @@ generate_math_gen6(struct brw_codegen *p,
struct brw_reg src1)
{
/* Can't do writemask because math can't be align16. */
- assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+ assert(dst.writemask == WRITEMASK_XYZW);
/* Source swizzles are ignored. */
check_gen6_math_src_arg(src0);
if (src1.file == BRW_GENERAL_REGISTER_FILE)
@@ -135,6 +135,10 @@ generate_tex(struct brw_codegen *p,
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
+ case SHADER_OPCODE_TXF_CMS_W:
+ assert(devinfo->gen >= 9);
+ msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
+ break;
case SHADER_OPCODE_TXF_CMS:
if (devinfo->gen >= 7)
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@@ -260,7 +264,7 @@ generate_tex(struct brw_codegen *p,
: prog_data->base.binding_table.texture_start;
if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
- uint32_t sampler = sampler_index.dw1.ud;
+ uint32_t sampler = sampler_index.ud;
brw_SAMPLE(p,
dst,
@@ -352,7 +356,7 @@ generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
/* We pass the temporary passed in src0 as the writeback register */
brw_urb_WRITE(p,
- inst->src[0].fixed_hw_reg, /* dest */
+ inst->src[0], /* dest */
inst->base_mrf, /* starting mrf reg nr */
src,
BRW_URB_WRITE_ALLOCATE_COMPLETE,
@@ -365,8 +369,8 @@ generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0),
- get_element_ud(inst->src[0].fixed_hw_reg, 0));
+ brw_MOV(p, get_element_ud(inst->dst, 0),
+ get_element_ud(inst->src[0], 0));
brw_pop_insn_state(p);
}
@@ -415,10 +419,10 @@ generate_gs_set_write_offset(struct brw_codegen *p,
assert(p->devinfo->gen >= 7 &&
src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_UD &&
- src1.dw1.ud <= USHRT_MAX);
+ src1.ud <= USHRT_MAX);
if (src0.file == BRW_IMMEDIATE_VALUE) {
brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
- brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
+ brw_imm_ud(src0.ud * src1.ud));
} else {
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
retype(src1, BRW_REGISTER_TYPE_UW));
@@ -736,7 +740,7 @@ generate_oword_dual_block_offsets(struct brw_codegen *p,
brw_MOV(p, m1_0, index_0);
if (index.file == BRW_IMMEDIATE_VALUE) {
- index_4.dw1.ud += second_vertex_offset;
+ index_4.ud += second_vertex_offset;
brw_MOV(p, m1_4, index_4);
} else {
brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
@@ -891,7 +895,7 @@ generate_pull_constant_load(struct brw_codegen *p,
const struct brw_device_info *devinfo = p->devinfo;
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
- uint32_t surf_index = index.dw1.ud;
+ uint32_t surf_index = index.ud;
struct brw_reg header = brw_vec8_grf(0, 0);
@@ -925,8 +929,6 @@ generate_pull_constant_load(struct brw_codegen *p,
2, /* mlen */
true, /* header_present */
1 /* rlen */);
-
- brw_mark_surface_used(&prog_data->base, surf_index);
}
static void
@@ -945,7 +947,7 @@ generate_get_buffer_size(struct brw_codegen *p,
dst,
inst->base_mrf,
src,
- surf_index.dw1.ud,
+ surf_index.ud,
0,
GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
1, /* response length */
@@ -954,7 +956,7 @@ generate_get_buffer_size(struct brw_codegen *p,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
BRW_SAMPLER_RETURN_FORMAT_SINT32);
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+ brw_mark_surface_used(&prog_data->base, surf_index.ud);
}
static void
@@ -973,7 +975,7 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, offset);
brw_set_sampler_message(p, insn,
- surf_index.dw1.ud,
+ surf_index.ud,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
@@ -982,7 +984,7 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+ brw_mark_surface_used(&prog_data->base, surf_index.ud);
} else {
@@ -1013,10 +1015,6 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
inst->header_size != 0,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
-
- /* visitor knows more than we do about the surface limit required,
- * so has already done marking.
- */
}
}
@@ -1061,9 +1059,9 @@ generate_code(struct brw_codegen *p,
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < 3; i++) {
- src[i] = inst->src[i].fixed_hw_reg;
+ src[i] = inst->src[i];
}
- dst = inst->dst.fixed_hw_reg;
+ dst = inst->dst;
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1243,7 +1241,7 @@ generate_code(struct brw_codegen *p,
break;
case BRW_OPCODE_IF:
- if (inst->src[0].file != BAD_FILE) {
+ if (!inst->src[0].is_null()) {
/* The instruction has an embedded compare (only allowed on gen6) */
assert(devinfo->gen == 6);
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
@@ -1313,6 +1311,7 @@ generate_code(struct brw_codegen *p,
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXS:
@@ -1416,38 +1415,38 @@ generate_code(struct brw_codegen *p,
case SHADER_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
+ brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
!inst->dst.is_null());
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
- src[2].dw1.ud);
+ src[2].ud);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
- src[2].dw1.ud);
+ src[2].ud);
break;
case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
+ brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
!inst->dst.is_null());
break;
case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
- src[2].dw1.ud);
+ src[2].ud);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_write(p, src[0], src[1], inst->mlen,
- src[2].dw1.ud);
+ src[2].ud);
break;
case SHADER_OPCODE_MEMORY_FENCE:
@@ -1495,9 +1494,9 @@ generate_code(struct brw_codegen *p,
*
* where they pack the four bytes from the low and high four DW.
*/
- assert(_mesa_is_pow_two(dst.dw1.bits.writemask) &&
- dst.dw1.bits.writemask != 0);
- unsigned offset = __builtin_ctz(dst.dw1.bits.writemask);
+ assert(_mesa_is_pow_two(dst.writemask) &&
+ dst.writemask != 0);
+ unsigned offset = __builtin_ctz(dst.writemask);
dst.type = BRW_REGISTER_TYPE_UB;
@@ -1549,6 +1548,13 @@ generate_code(struct brw_codegen *p,
brw_set_uip_jip(p);
annotation_finalize(&annotation, p->next_insn_offset);
+#ifndef NDEBUG
+ bool validated = brw_validate_instructions(p, 0, &annotation);
+#else
+ if (unlikely(debug_flag))
+ brw_validate_instructions(p, 0, &annotation);
+#endif
+
int before_size = p->next_insn_offset;
brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann);
int after_size = p->next_insn_offset;
@@ -1566,8 +1572,9 @@ generate_code(struct brw_codegen *p,
dump_assembly(p->store, annotation.ann_count, annotation.ann,
p->devinfo);
- ralloc_free(annotation.ann);
+ ralloc_free(annotation.mem_ctx);
}
+ assert(validated);
compiler->shader_debug_log(log_data,
"%s vec4 shader: %d inst, %d loops, %u cycles, "
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index cfb5cd95cb1..1a09f76a20c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -29,6 +29,7 @@
#include "brw_vec4_gs_visitor.h"
#include "gen6_gs_visitor.h"
+#include "brw_fs.h"
namespace brw {
@@ -811,6 +812,36 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
/* Now that prog_data setup is done, we are ready to actually compile the
* program.
*/
+ if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
+ fprintf(stderr, "GS Input ");
+ brw_print_vue_map(stderr, &c.input_vue_map);
+ fprintf(stderr, "GS Output ");
+ brw_print_vue_map(stderr, &prog_data->base.vue_map);
+ }
+
+ if (compiler->scalar_gs) {
+ /* TODO: Support instanced GS. We have basically no tests... */
+ assert(prog_data->invocations == 1);
+
+ fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader,
+ shader_time_index);
+ if (v.run_gs()) {
+ prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+
+ fs_generator g(compiler, log_data, mem_ctx, &c.key,
+ &prog_data->base.base, v.promoted_constants,
+ false, "GS");
+ if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
+ const char *label =
+ shader->info.label ? shader->info.label : "unnamed";
+ char *name = ralloc_asprintf(mem_ctx, "%s geometry shader %s",
+ label, shader->info.name);
+ g.enable_debug(name);
+ }
+ g.generate_code(v.cfg, 8);
+ return g.get_assembly(final_assembly_size);
+ }
+ }
if (compiler->devinfo->gen >= 7) {
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index aa9a6572eee..57d5fbb75dd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -75,7 +75,7 @@ vec4_live_variables::setup_def_use()
/* Set use[] for this instruction */
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
for (unsigned j = 0; j < inst->regs_read(i); j++) {
for (int c = 0; c < 4; c++) {
const unsigned v =
@@ -97,7 +97,7 @@ vec4_live_variables::setup_def_use()
* are the things that screen off preceding definitions of a
* variable, and thus qualify for being in def[].
*/
- if (inst->dst.file == GRF &&
+ if (inst->dst.file == VGRF &&
(!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
for (unsigned i = 0; i < inst->regs_written; i++) {
for (int c = 0; c < 4; c++) {
@@ -256,7 +256,7 @@ vec4_visitor::calculate_live_intervals()
int ip = 0;
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
for (unsigned j = 0; j < inst->regs_read(i); j++) {
for (int c = 0; c < 4; c++) {
const unsigned v =
@@ -268,7 +268,7 @@ vec4_visitor::calculate_live_intervals()
}
}
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
for (unsigned i = 0; i < inst->regs_written; i++) {
for (int c = 0; c < 4; c++) {
if (inst->dst.writemask & (1 << c)) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
index e7929ec2189..12d281eb245 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
@@ -82,9 +82,9 @@ inline unsigned
var_from_reg(const simple_allocator &alloc, const src_reg &reg,
unsigned c = 0)
{
- assert(reg.file == GRF && reg.reg < alloc.count &&
- reg.reg_offset < alloc.sizes[reg.reg] && c < 4);
- return (4 * (alloc.offsets[reg.reg] + reg.reg_offset) +
+ assert(reg.file == VGRF && reg.nr < alloc.count &&
+ reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
+ return (4 * (alloc.offsets[reg.nr] + reg.reg_offset) +
BRW_GET_SWZ(reg.swizzle, c));
}
@@ -92,9 +92,9 @@ inline unsigned
var_from_reg(const simple_allocator &alloc, const dst_reg &reg,
unsigned c = 0)
{
- assert(reg.file == GRF && reg.reg < alloc.count &&
- reg.reg_offset < alloc.sizes[reg.reg] && c < 4);
- return 4 * (alloc.offsets[reg.reg] + reg.reg_offset) + c;
+ assert(reg.file == VGRF && reg.nr < alloc.count &&
+ reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
+ return 4 * (alloc.offsets[reg.nr] + reg.reg_offset) + c;
}
} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 1fb1773f856..258dd4f6548 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -106,6 +106,9 @@ void
vec4_visitor::nir_setup_system_values()
{
nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
+ for (unsigned i = 0; i < SYSTEM_VALUE_MAX; i++) {
+ nir_system_values[i] = dst_reg();
+ }
nir_foreach_overload(nir, overload) {
assert(strcmp(overload->function->name, "main") == 0);
@@ -118,6 +121,9 @@ void
vec4_visitor::nir_setup_inputs()
{
nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
+ for (unsigned i = 0; i < nir->num_inputs; i++) {
+ nir_inputs[i] = dst_reg();
+ }
nir_foreach_variable(var, &nir->inputs) {
int offset = var->data.driver_location;
@@ -148,12 +154,15 @@ void
vec4_visitor::nir_emit_impl(nir_function_impl *impl)
{
nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc);
+ for (unsigned i = 0; i < impl->reg_alloc; i++) {
+ nir_locals[i] = dst_reg();
+ }
foreach_list_typed(nir_register, reg, node, &impl->registers) {
unsigned array_elems =
reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
- nir_locals[reg->index] = dst_reg(GRF, alloc.allocate(array_elems));
+ nir_locals[reg->index] = dst_reg(VGRF, alloc.allocate(array_elems));
}
nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc);
@@ -282,7 +291,7 @@ dst_reg
vec4_visitor::get_nir_dest(nir_dest dest)
{
if (dest.is_ssa) {
- dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+ dst_reg dst = dst_reg(VGRF, alloc.allocate(1));
nir_ssa_values[dest.ssa.index] = dst;
return dst;
} else {
@@ -342,7 +351,7 @@ vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
void
vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
{
- dst_reg reg = dst_reg(GRF, alloc.allocate(1));
+ dst_reg reg = dst_reg(VGRF, alloc.allocate(1));
reg.type = BRW_REGISTER_TYPE_D;
unsigned remaining = brw_writemask_for_size(instr->def.num_components);
@@ -427,15 +436,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
- src_reg surf_index = src_reg(prog_data->base.binding_table.ssbo_start +
- ssbo_index);
+ const unsigned index =
+ prog_data->base.binding_table.ssbo_start + ssbo_index;
dst_reg result_dst = get_nir_dest(instr->dest);
vec4_instruction *inst = new(mem_ctx)
vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
inst->base_mrf = 2;
inst->mlen = 1; /* always at least one */
- inst->src[1] = src_reg(surf_index);
+ inst->src[1] = src_reg(index);
/* MRF for the first parameter */
src_reg lod = src_reg(0);
@@ -444,6 +453,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
emit(inst);
+
+ brw_mark_surface_used(&prog_data->base, index);
break;
}
@@ -749,8 +760,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
/* The block index is a constant, so just emit the binding table entry
* as an immediate.
*/
- surf_index = src_reg(prog_data->base.binding_table.ubo_start +
- const_block_index->u[0]);
+ const unsigned index = prog_data->base.binding_table.ubo_start +
+ const_block_index->u[0];
+ surf_index = src_reg(index);
+ brw_mark_surface_used(&prog_data->base, index);
} else {
/* The block index is not a constant. Evaluate the index expression
* per-channel and add the base UBO index; we have to select a value
@@ -1407,7 +1420,23 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_bcsel:
emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
+ switch (dst.writemask) {
+ case WRITEMASK_X:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ break;
+ case WRITEMASK_Y:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ break;
+ case WRITEMASK_Z:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ break;
+ case WRITEMASK_W:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ break;
+ default:
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ }
break;
case nir_op_fdot_replicated2:
@@ -1708,7 +1737,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
void
vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
{
- nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+ nir_ssa_values[instr->def.index] = dst_reg(VGRF, alloc.allocate(1));
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index a49eca56118..6d27a4694d3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -34,8 +34,8 @@ namespace brw {
static void
assign(unsigned int *reg_hw_locations, backend_reg *reg)
{
- if (reg->file == GRF) {
- reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset;
+ if (reg->file == VGRF) {
+ reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset;
reg->reg_offset = 0;
}
}
@@ -55,12 +55,12 @@ vec4_visitor::reg_allocate_trivial()
}
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
- if (inst->dst.file == GRF)
- virtual_grf_used[inst->dst.reg] = true;
+ if (inst->dst.file == VGRF)
+ virtual_grf_used[inst->dst.nr] = true;
for (unsigned i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF)
- virtual_grf_used[inst->src[i].reg] = true;
+ if (inst->src[i].file == VGRF)
+ virtual_grf_used[inst->src[i].nr] = true;
}
}
@@ -292,12 +292,12 @@ static bool
can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
unsigned scratch_reg)
{
- assert(inst->src[i].file == GRF);
+ assert(inst->src[i].file == VGRF);
bool prev_inst_read_scratch_reg = false;
/* See if any previous source in the same instructions reads scratch_reg */
for (unsigned n = 0; n < i; n++) {
- if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
+ if (inst->src[n].file == VGRF && inst->src[n].nr == scratch_reg)
prev_inst_read_scratch_reg = true;
}
@@ -310,7 +310,7 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
* it if the write is not conditional and the channels we write are
* compatible with our read mask
*/
- if (prev_inst->dst.file == GRF && prev_inst->dst.reg == scratch_reg) {
+ if (prev_inst->dst.file == VGRF && prev_inst->dst.nr == scratch_reg) {
return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
(brw_mask_for_swizzle(inst->src[i].swizzle) &
~prev_inst->dst.writemask) == 0;
@@ -329,8 +329,8 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
*/
int n;
for (n = 0; n < 3; n++) {
- if (prev_inst->src[n].file == GRF &&
- prev_inst->src[n].reg == scratch_reg) {
+ if (prev_inst->src[n].file == VGRF &&
+ prev_inst->src[n].nr == scratch_reg) {
prev_inst_read_scratch_reg = true;
break;
}
@@ -374,23 +374,23 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
*/
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF) {
+ if (inst->src[i].file == VGRF) {
/* We will only unspill src[i] it it wasn't unspilled for the
* previous instruction, in which case we'll just reuse the scratch
* reg for this instruction.
*/
- if (!can_use_scratch_for_source(inst, i, inst->src[i].reg)) {
- spill_costs[inst->src[i].reg] += loop_scale;
+ if (!can_use_scratch_for_source(inst, i, inst->src[i].nr)) {
+ spill_costs[inst->src[i].nr] += loop_scale;
if (inst->src[i].reladdr)
- no_spill[inst->src[i].reg] = true;
+ no_spill[inst->src[i].nr] = true;
}
}
}
- if (inst->dst.file == GRF) {
- spill_costs[inst->dst.reg] += loop_scale;
+ if (inst->dst.file == VGRF) {
+ spill_costs[inst->dst.nr] += loop_scale;
if (inst->dst.reladdr)
- no_spill[inst->dst.reg] = true;
+ no_spill[inst->dst.nr] = true;
}
switch (inst->opcode) {
@@ -406,11 +406,11 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
for (int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF)
- no_spill[inst->src[i].reg] = true;
+ if (inst->src[i].file == VGRF)
+ no_spill[inst->src[i].nr] = true;
}
- if (inst->dst.file == GRF)
- no_spill[inst->dst.reg] = true;
+ if (inst->dst.file == VGRF)
+ no_spill[inst->dst.nr] = true;
break;
default:
@@ -445,7 +445,7 @@ vec4_visitor::spill_reg(int spill_reg_nr)
int scratch_reg = -1;
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
+ if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg_nr) {
if (scratch_reg == -1 ||
!can_use_scratch_for_source(inst, i, scratch_reg)) {
/* We need to unspill anyway so make sure we read the full vec4
@@ -455,19 +455,19 @@ vec4_visitor::spill_reg(int spill_reg_nr)
*/
scratch_reg = alloc.allocate(1);
src_reg temp = inst->src[i];
- temp.reg = scratch_reg;
+ temp.nr = scratch_reg;
temp.swizzle = BRW_SWIZZLE_XYZW;
emit_scratch_read(block, inst,
dst_reg(temp), inst->src[i], spill_offset);
}
assert(scratch_reg != -1);
- inst->src[i].reg = scratch_reg;
+ inst->src[i].nr = scratch_reg;
}
}
- if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
+ if (inst->dst.file == VGRF && inst->dst.nr == spill_reg_nr) {
emit_scratch_write(block, inst, spill_offset);
- scratch_reg = inst->dst.reg;
+ scratch_reg = inst->dst.nr;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 92b089d7ff6..70a1ea4f9d4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -237,8 +237,6 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
* type to match src0 so we can compact the instruction.
*/
dst.type = src0.type;
- if (dst.file == HW_REG)
- dst.fixed_hw_reg.type = dst.type;
resolve_ud_negate(&src0);
resolve_ud_negate(&src1);
@@ -635,8 +633,8 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
{
init();
- this->file = GRF;
- this->reg = v->alloc.allocate(type_size_vec4(type));
+ this->file = VGRF;
+ this->nr = v->alloc.allocate(type_size_vec4(type));
if (type->is_array() || type->is_record()) {
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -653,8 +651,8 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
init();
- this->file = GRF;
- this->reg = v->alloc.allocate(type_size_vec4(type) * size);
+ this->file = VGRF;
+ this->nr = v->alloc.allocate(type_size_vec4(type) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -665,8 +663,8 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
{
init();
- this->file = GRF;
- this->reg = v->alloc.allocate(type_size_vec4(type));
+ this->file = VGRF;
+ this->nr = v->alloc.allocate(type_size_vec4(type));
if (type->is_array() || type->is_record()) {
this->writemask = WRITEMASK_XYZW;
@@ -864,7 +862,7 @@ vec4_visitor::is_high_sampler(src_reg sampler)
if (devinfo->gen < 8 && !devinfo->is_haswell)
return false;
- return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+ return sampler.file != IMM || sampler.ud >= 16;
}
void
@@ -901,7 +899,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
case ir_txl: opcode = SHADER_OPCODE_TXL; break;
case ir_txd: opcode = SHADER_OPCODE_TXD; break;
case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
+ case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
+ SHADER_OPCODE_TXF_CMS); break;
case ir_txs: opcode = SHADER_OPCODE_TXS; break;
case ir_tg4: opcode = offset_value.file != BAD_FILE
? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
@@ -993,7 +992,16 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
} else if (op == ir_txf_ms) {
emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
sample_index));
- if (devinfo->gen >= 7) {
+ if (opcode == SHADER_OPCODE_TXF_CMS_W) {
+ /* MCS data is stored in the first two channels of ‘mcs’, but we
+ * need to get it into the .y and .z channels of the second vec4
+ * of params.
+ */
+ mcs.swizzle = BRW_SWIZZLE4(0, 0, 1, 1);
+ emit(MOV(dst_reg(MRF, param_base + 1,
+ glsl_type::uint_type, WRITEMASK_YZ),
+ mcs));
+ } else if (devinfo->gen >= 7) {
/* MCS data is in the first channel of `mcs`, but we need to get it into
* the .y channel of the second vec4 of params, so replicate .x across
* the whole vec4 and then mask off everything except .y
@@ -1184,24 +1192,27 @@ vec4_visitor::gs_end_primitive()
void
vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- dst_reg dst, src_reg offset,
+ dst_reg dst, src_reg surf_offset,
src_reg src0, src_reg src1)
{
- unsigned mlen = 0;
+ unsigned mlen = 1 + (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+ src_reg src_payload(this, glsl_type::uint_type, mlen);
+ dst_reg payload(src_payload);
+ payload.writemask = WRITEMASK_X;
/* Set the atomic operation offset. */
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
- mlen++;
+ emit(MOV(offset(payload, 0), surf_offset));
+ unsigned i = 1;
/* Set the atomic operation arguments. */
if (src0.file != BAD_FILE) {
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
- mlen++;
+ emit(MOV(offset(payload, i), src0));
+ i++;
}
if (src1.file != BAD_FILE) {
- emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
- mlen++;
+ emit(MOV(offset(payload, i), src1));
+ i++;
}
/* Emit the instruction. Note that this maps to the normal SIMD8
@@ -1209,24 +1220,27 @@ vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
* unused channels will be masked out.
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
- brw_message_reg(0),
+ src_payload,
src_reg(surf_index), src_reg(atomic_op));
inst->mlen = mlen;
}
void
vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
- src_reg offset)
+ src_reg surf_offset)
{
+ dst_reg offset(this, glsl_type::uint_type);
+ offset.writemask = WRITEMASK_X;
+
/* Set the surface read offset. */
- emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
+ emit(MOV(offset, surf_offset));
/* Emit the instruction. Note that this maps to the normal SIMD8
* untyped surface read message, but that's OK because unused
* channels will be masked out.
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
- brw_message_reg(0),
+ src_reg(offset),
src_reg(surf_index), src_reg(1));
inst->mlen = 1;
}
@@ -1602,7 +1616,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
inst->insert_after(block, write);
inst->dst.file = temp.file;
- inst->dst.reg = temp.reg;
+ inst->dst.nr = temp.nr;
inst->dst.reg_offset = temp.reg_offset;
inst->dst.reladdr = NULL;
}
@@ -1629,10 +1643,10 @@ vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
*src.reladdr);
/* Now handle scratch access on src */
- if (src.file == GRF && scratch_loc[src.reg] != -1) {
+ if (src.file == VGRF && scratch_loc[src.nr] != -1) {
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
- emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
- src.reg = temp.reg;
+ emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
+ src.nr = temp.nr;
src.reg_offset = temp.reg_offset;
src.reladdr = NULL;
}
@@ -1657,18 +1671,18 @@ vec4_visitor::move_grf_array_access_to_scratch()
* scratch.
*/
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
- if (inst->dst.file == GRF && inst->dst.reladdr) {
- if (scratch_loc[inst->dst.reg] == -1) {
- scratch_loc[inst->dst.reg] = last_scratch;
- last_scratch += this->alloc.sizes[inst->dst.reg];
+ if (inst->dst.file == VGRF && inst->dst.reladdr) {
+ if (scratch_loc[inst->dst.nr] == -1) {
+ scratch_loc[inst->dst.nr] = last_scratch;
+ last_scratch += this->alloc.sizes[inst->dst.nr];
}
for (src_reg *iter = inst->dst.reladdr;
iter->reladdr;
iter = iter->reladdr) {
- if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
- scratch_loc[iter->reg] = last_scratch;
- last_scratch += this->alloc.sizes[iter->reg];
+ if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
+ scratch_loc[iter->nr] = last_scratch;
+ last_scratch += this->alloc.sizes[iter->nr];
}
}
}
@@ -1677,9 +1691,9 @@ vec4_visitor::move_grf_array_access_to_scratch()
for (src_reg *iter = &inst->src[i];
iter->reladdr;
iter = iter->reladdr) {
- if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
- scratch_loc[iter->reg] = last_scratch;
- last_scratch += this->alloc.sizes[iter->reg];
+ if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
+ scratch_loc[iter->nr] = last_scratch;
+ last_scratch += this->alloc.sizes[iter->nr];
}
}
}
@@ -1705,8 +1719,8 @@ vec4_visitor::move_grf_array_access_to_scratch()
/* Now that we have handled any (possibly recursive) reladdr scratch
* accesses for dst we can safely do the scratch write for dst itself
*/
- if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
- emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
+ if (inst->dst.file == VGRF && scratch_loc[inst->dst.nr] != -1)
+ emit_scratch_write(block, inst, scratch_loc[inst->dst.nr]);
/* Now handle scratch access on any src. In this case, since inst->src[i]
* already is a src_reg, we can just call emit_resolve_reladdr with
@@ -1730,14 +1744,16 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
int base_offset)
{
int reg_offset = base_offset + orig_src.reg_offset;
- src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
+ const unsigned index = prog_data->base.binding_table.pull_constants_start;
src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
reg_offset);
emit_pull_constant_load_reg(temp,
- index,
+ src_reg(index),
offset,
block, inst);
+
+ brw_mark_surface_used(&prog_data->base, index);
}
/**
@@ -1773,7 +1789,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
continue;
- int uniform = inst->src[i].reg;
+ int uniform = inst->src[i].nr;
if (inst->src[i].reladdr->reladdr)
nested_reladdr = true; /* will need another pass */
@@ -1804,7 +1820,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
pull_constant_loc[uniform]);
inst->src[i].file = temp.file;
- inst->src[i].reg = temp.reg;
+ inst->src[i].nr = temp.nr;
inst->src[i].reg_offset = temp.reg_offset;
inst->src[i].reladdr = NULL;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 0b805b1c0c4..967448e0e41 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -159,9 +159,13 @@ brw_codegen_vs_prog(struct brw_context *brw,
start_time = get_time();
}
- if (unlikely(INTEL_DEBUG & DEBUG_VS))
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base);
+ fprintf(stderr, "VS Output ");
+ brw_print_vue_map(stderr, &prog_data.base.vue_map);
+ }
+
int st_index = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS);
diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 45662bd5afc..edb16087410 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -178,3 +178,30 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
vue_map->num_slots = separate ? slot + 1 : slot;
}
+
+static const char *
+varying_name(brw_varying_slot slot)
+{
+ if (slot < VARYING_SLOT_MAX)
+ return gl_varying_slot_name(slot);
+
+ static const char *brw_names[] = {
+ [BRW_VARYING_SLOT_NDC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_NDC",
+ [BRW_VARYING_SLOT_PAD - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PAD",
+ [BRW_VARYING_SLOT_PNTC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PNTC",
+ };
+
+ return brw_names[slot - VARYING_SLOT_MAX];
+}
+
+void
+brw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map)
+{
+ fprintf(fp, "VUE map (%d slots, %s)\n",
+ vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
+ for (int i = 0; i < vue_map->num_slots; i++) {
+ fprintf(fp, " [%d] %s\n", i,
+ varying_name(vue_map->slot_to_varying[i]));
+ }
+ fprintf(fp, "\n");
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 5c49db9e63e..8d9ed3a6c33 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -212,6 +212,9 @@ brw_debug_recompile_sampler_key(struct brw_context *brw,
found |= key_debug(brw, "compressed multisample layout",
old_key->compressed_multisample_layout_mask,
key->compressed_multisample_layout_mask);
+ found |= key_debug(brw, "16x msaa",
+ old_key->msaa_16,
+ key->msaa_16);
for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
found |= key_debug(brw, "textureGather workarounds",
@@ -371,6 +374,11 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx,
if (brw->gen >= 7 &&
intel_tex->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
key->compressed_multisample_layout_mask |= 1 << s;
+
+ if (intel_tex->mt->num_samples >= 16) {
+ assert(brw->gen >= 9);
+ key->msaa_16 |= 1 << s;
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index 8444c0c9bae..8eb620de56b 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -48,6 +48,9 @@ gen6_get_sample_position(struct gl_context *ctx,
case 8:
bits = brw_multisample_positions_8x[index >> 2] >> (8 * (index & 3));
break;
+ case 16:
+ bits = brw_multisample_positions_16x[index >> 2] >> (8 * (index & 3));
+ break;
default:
unreachable("Not implemented");
}
@@ -88,6 +91,17 @@ gen6_get_sample_position(struct gl_context *ctx,
* | 6 | 7 | | 7 | 1 |
* --------- ---------
*
+ * 16X MSAA sample index layout 16x MSAA sample number layout
+ * ----------------- -----------------
+ * | 0 | 1 | 2 | 3 | |15 |10 | 9 | 7 |
+ * ----------------- -----------------
+ * | 4 | 5 | 6 | 7 | | 4 | 1 | 3 |13 |
+ * ----------------- -----------------
+ * | 8 | 9 |10 |11 | |12 | 2 | 0 | 6 |
+ * ----------------- -----------------
+ * |12 |13 |14 |15 | |11 | 8 | 5 |14 |
+ * ----------------- -----------------
+ *
* A sample map is used to map sample indices to sample numbers.
*/
void
@@ -96,10 +110,13 @@ gen6_set_sample_maps(struct gl_context *ctx)
uint8_t map_2x[2] = {0, 1};
uint8_t map_4x[4] = {0, 1, 2, 3};
uint8_t map_8x[8] = {5, 2, 4, 6, 0, 3, 7, 1};
+ uint8_t map_16x[16] = { 15, 10, 9, 7, 4, 1, 3, 13,
+ 12, 2, 0, 6, 11, 8, 5, 14 };
memcpy(ctx->Const.SampleMap2x, map_2x, sizeof(map_2x));
memcpy(ctx->Const.SampleMap4x, map_4x, sizeof(map_4x));
memcpy(ctx->Const.SampleMap8x, map_8x, sizeof(map_8x));
+ memcpy(ctx->Const.SampleMap16x, map_16x, sizeof(map_16x));
}
/**
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index 3899ce9451f..2f6eadffd2e 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -131,7 +131,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
}
if (!need_binding_table) {
if (brw->ff_gs.bind_bo_offset != 0) {
- brw->ctx.NewDriverState |= BRW_NEW_GS_BINDING_TABLE;
+ brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
brw->ff_gs.bind_bo_offset = 0;
}
return;
@@ -162,7 +162,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
if (!need_binding_table) {
if (brw->gs.base.bind_bo_offset != 0) {
brw->gs.base.bind_bo_offset = 0;
- brw->ctx.NewDriverState |= BRW_NEW_GS_BINDING_TABLE;
+ brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
}
return;
}
@@ -179,7 +179,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
BRW_MAX_SURFACES * sizeof(uint32_t));
}
- brw->ctx.NewDriverState |= BRW_NEW_GS_BINDING_TABLE;
+ brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
}
const struct brw_tracked_state gen6_gs_binding_table = {
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 5080f1c3fe4..438caefdd4a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -78,7 +78,7 @@ gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout layout)
{
uint32_t ss4 = 0;
- assert(num_samples <= 8);
+ assert(num_samples <= 16);
/* The SURFACE_MULTISAMPLECOUNT_X enums are simply log2(num_samples) << 3. */
ss4 |= (ffs(MAX2(num_samples, 1)) - 1) << 3;
diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
index 75cbe06c522..4427f15996d 100644
--- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
@@ -52,13 +52,11 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
- /* 16x MSAA
- * XXX: Need to program these.
- */
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
+ /* 16x MSAA */
+ OUT_BATCH(brw_multisample_positions_16x[0]); /* positions 3, 2, 1, 0 */
+ OUT_BATCH(brw_multisample_positions_16x[1]); /* positions 7, 6, 5, 4 */
+ OUT_BATCH(brw_multisample_positions_16x[2]); /* positions 11, 10, 9, 8 */
+ OUT_BATCH(brw_multisample_positions_16x[3]); /* positions 15, 14, 13, 12 */
/* 8x MSAA */
OUT_BATCH(brw_multisample_positions_8x[1]); /* sample positions 7654 */
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index b3d6324a5fe..fdd605a7db0 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -23,12 +23,8 @@
#include "brw_cfg.h"
#include "brw_eu.h"
-#include "brw_context.h"
#include "intel_debug.h"
#include "intel_asm_annotation.h"
-#include "program/prog_print.h"
-#include "program/prog_instruction.h"
-#include "main/macros.h"
#include "glsl/nir/nir.h"
void
@@ -69,6 +65,10 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation
brw_disassemble(devinfo, assembly, start_offset, end_offset, stderr);
+ if (annotation[i].error) {
+ fputs(annotation[i].error, stderr);
+ }
+
if (annotation[i].block_end) {
fprintf(stderr, " END B%d", annotation[i].block_end->num);
foreach_list_typed(struct bblock_link, successor_link, link,
@@ -82,9 +82,8 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation
fprintf(stderr, "\n");
}
-void annotate(const struct brw_device_info *devinfo,
- struct annotation_info *annotation, const struct cfg_t *cfg,
- struct backend_instruction *inst, unsigned offset)
+static bool
+annotation_array_ensure_space(struct annotation_info *annotation)
{
if (annotation->ann_size <= annotation->ann_count) {
int old_size = annotation->ann_size;
@@ -92,12 +91,25 @@ void annotate(const struct brw_device_info *devinfo,
annotation->ann = reralloc(annotation->mem_ctx, annotation->ann,
struct annotation, annotation->ann_size);
if (!annotation->ann)
- return;
+ return false;
memset(annotation->ann + old_size, 0,
(annotation->ann_size - old_size) * sizeof(struct annotation));
}
+ return true;
+}
+
+void annotate(const struct brw_device_info *devinfo,
+ struct annotation_info *annotation, const struct cfg_t *cfg,
+ struct backend_instruction *inst, unsigned offset)
+{
+ if (annotation->mem_ctx == NULL)
+ annotation->mem_ctx = ralloc_context(NULL);
+
+ if (!annotation_array_ensure_space(annotation))
+ return;
+
struct annotation *ann = &annotation->ann[annotation->ann_count++];
ann->offset = offset;
if ((INTEL_DEBUG & DEBUG_ANNOTATION) != 0) {
@@ -109,6 +121,24 @@ void annotate(const struct brw_device_info *devinfo,
ann->block_start = cfg->blocks[annotation->cur_block];
}
+ if (bblock_end(cfg->blocks[annotation->cur_block]) == inst) {
+ ann->block_end = cfg->blocks[annotation->cur_block];
+ annotation->cur_block++;
+ }
+
+ /* Merge this annotation with the previous if possible. */
+ struct annotation *prev = annotation->ann_count > 1 ?
+ &annotation->ann[annotation->ann_count - 2] : NULL;
+ if (prev != NULL &&
+ ann->ir == prev->ir &&
+ ann->annotation == prev->annotation &&
+ ann->block_start == NULL &&
+ prev->block_end == NULL) {
+ if (ann->block_end == NULL)
+ annotation->ann_count--;
+ return;
+ }
+
/* There is no hardware DO instruction on Gen6+, so since DO always
* starts a basic block, we need to set the .block_start of the next
* instruction's annotation with a pointer to the bblock started by
@@ -120,11 +150,6 @@ void annotate(const struct brw_device_info *devinfo,
if (devinfo->gen >= 6 && inst->opcode == BRW_OPCODE_DO) {
annotation->ann_count--;
}
-
- if (bblock_end(cfg->blocks[annotation->cur_block]) == inst) {
- ann->block_end = cfg->blocks[annotation->cur_block];
- annotation->cur_block++;
- }
}
void
@@ -140,3 +165,47 @@ annotation_finalize(struct annotation_info *annotation,
}
annotation->ann[annotation->ann_count].offset = next_inst_offset;
}
+
+void
+annotation_insert_error(struct annotation_info *annotation, unsigned offset,
+ const char *error)
+{
+ struct annotation *ann;
+
+ if (!annotation->ann_count)
+ return;
+
+ /* We may have to split an annotation, so ensure we have enough space
+ * allocated for that case up front.
+ */
+ if (!annotation_array_ensure_space(annotation))
+ return;
+
+ assume(annotation->ann_count > 0);
+
+ for (int i = 0; i < annotation->ann_count; i++) {
+ struct annotation *cur = &annotation->ann[i];
+ struct annotation *next = &annotation->ann[i + 1];
+ ann = cur;
+
+ if (next->offset <= offset)
+ continue;
+
+ if (offset + sizeof(brw_inst) != next->offset) {
+ memmove(next, cur,
+ (annotation->ann_count - i + 2) * sizeof(struct annotation));
+ cur->error = NULL;
+ cur->error_length = 0;
+ cur->block_end = NULL;
+ next->offset = offset + sizeof(brw_inst);
+ next->block_start = NULL;
+ annotation->ann_count++;
+ }
+ break;
+ }
+
+ if (ann->error)
+ ralloc_strcat(&ann->error, error);
+ else
+ ann->error = ralloc_strdup(annotation->mem_ctx, error);
+}
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.h b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
index 6c72326f058..662a4b4e0f7 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.h
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
@@ -37,6 +37,9 @@ struct cfg_t;
struct annotation {
int offset;
+ size_t error_length;
+ char *error;
+
/* Pointers to the basic block in the CFG if the instruction group starts
* or ends a basic block.
*/
@@ -69,6 +72,10 @@ annotate(const struct brw_device_info *devinfo,
void
annotation_finalize(struct annotation_info *annotation, unsigned offset);
+void
+annotation_insert_error(struct annotation_info *annotation, unsigned offset,
+ const char *error);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 4643ea3e87b..386b63c123d 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -174,6 +174,7 @@ intelInitExtensions(struct gl_context *ctx)
assert(brw->gen >= 4);
+ ctx->Extensions.ARB_arrays_of_arrays = true;
ctx->Extensions.ARB_buffer_storage = true;
ctx->Extensions.ARB_clear_texture = true;
ctx->Extensions.ARB_clip_control = true;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b6e35205727..b1a7632d82f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -416,9 +416,13 @@ intel_miptree_create_layout(struct brw_context *brw,
width0 = ALIGN(width0, 2) * 4;
height0 = ALIGN(height0, 2) * 2;
break;
+ case 16:
+ width0 = ALIGN(width0, 2) * 4;
+ height0 = ALIGN(height0, 2) * 4;
+ break;
default:
- /* num_samples should already have been quantized to 0, 1, 2, 4, or
- * 8.
+ /* num_samples should already have been quantized to 0, 1, 2, 4, 8
+ * or 16.
*/
unreachable("not reached");
}
@@ -1423,6 +1427,12 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
*/
format = MESA_FORMAT_R_UINT32;
break;
+ case 16:
+ /* 64 bits/pixel are required for MCS data when using 16x MSAA (4 bits
+ * for each sample).
+ */
+ format = MESA_FORMAT_RG_UINT32;
+ break;
default:
unreachable("Unrecognized sample count in intel_miptree_alloc_mcs");
};
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index fb95fb629ad..d64ebade769 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1178,12 +1178,15 @@ intel_detect_timestamp(struct intel_screen *screen)
const int*
intel_supported_msaa_modes(const struct intel_screen *screen)
{
+ static const int gen9_modes[] = {16, 8, 4, 2, 0, -1};
static const int gen8_modes[] = {8, 4, 2, 0, -1};
static const int gen7_modes[] = {8, 4, 0, -1};
static const int gen6_modes[] = {4, 0, -1};
static const int gen4_modes[] = {0, -1};
- if (screen->devinfo->gen >= 8) {
+ if (screen->devinfo->gen >= 9) {
+ return gen9_modes;
+ } else if (screen->devinfo->gen >= 8) {
return gen8_modes;
} else if (screen->devinfo->gen >= 7) {
return gen7_modes;
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index e80b71b558d..a1f91d9c56a 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -144,7 +144,7 @@ TEST_F(copy_propagation_test, test_swizzle_swizzle)
copy_propagation(v);
- EXPECT_EQ(test_mov->src[0].reg, a.reg);
+ EXPECT_EQ(test_mov->src[0].nr, a.nr);
EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(SWIZZLE_Z,
SWIZZLE_W,
SWIZZLE_X,
@@ -174,7 +174,7 @@ TEST_F(copy_propagation_test, test_swizzle_writemask)
copy_propagation(v);
/* should not copy propagate */
- EXPECT_EQ(test_mov->src[0].reg, b.reg);
+ EXPECT_EQ(test_mov->src[0].nr, b.nr);
EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(SWIZZLE_W,
SWIZZLE_W,
SWIZZLE_W,
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index 2f824617454..d84e2e98ec0 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -213,7 +213,7 @@ TEST_F(register_coalesce_test, test_dp4_grf)
register_coalesce(v);
- EXPECT_EQ(dp4->dst.reg, to.reg);
+ EXPECT_EQ(dp4->dst.nr, to.nr);
EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
}
@@ -239,5 +239,5 @@ TEST_F(register_coalesce_test, test_channel_mul_grf)
register_coalesce(v);
- EXPECT_EQ(mul->dst.reg, to.reg);
+ EXPECT_EQ(mul->dst.nr, to.nr);
}
diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 061e557a397..897dac6e5db 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -149,8 +149,6 @@ unbind_array_object_vbos(struct gl_context *ctx, struct gl_vertex_array_object *
/**
* Allocate and initialize a new vertex array object.
- *
- * This function is intended to be called via
*/
struct gl_vertex_array_object *
_mesa_new_vao(struct gl_context *ctx, GLuint name)
@@ -164,9 +162,6 @@ _mesa_new_vao(struct gl_context *ctx, GLuint name)
/**
* Delete an array object.
- *
- * This function is intended to be called via
- * \c dd_function_table::DeleteArrayObject.
*/
void
_mesa_delete_vao(struct gl_context *ctx, struct gl_vertex_array_object *obj)
diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index 20aa4980935..ddf7f497f1e 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -639,7 +639,7 @@ _mesa_AlphaFunc( GLenum func, GLclampf ref )
* \param opcode operation.
*
* Verifies that \p opcode is a valid enum and updates
-gl_colorbuffer_attrib::LogicOp.
+ * gl_colorbuffer_attrib::LogicOp.
* On a change, flushes the vertices and notifies the driver via the
* dd_function_table::LogicOpcode callback.
*/
diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h
index 1e7a12c8a84..4798b1f9b43 100644
--- a/src/mesa/main/context.h
+++ b/src/mesa/main/context.h
@@ -50,6 +50,7 @@
#include "imports.h"
+#include "extensions.h"
#include "mtypes.h"
#include "vbo/vbo.h"
diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c
index f02e842f34d..d571d221bce 100644
--- a/src/mesa/main/copyimage.c
+++ b/src/mesa/main/copyimage.c
@@ -62,6 +62,8 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
struct gl_renderbuffer **renderbuffer,
mesa_format *format,
GLenum *internalFormat,
+ GLuint *width,
+ GLuint *height,
const char *dbg_prefix)
{
if (name == 0) {
@@ -126,6 +128,8 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
*renderbuffer = rb;
*format = rb->Format;
*internalFormat = rb->InternalFormat;
+ *width = rb->Width;
+ *height = rb->Height;
*tex_image = NULL;
} else {
struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
@@ -194,6 +198,8 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
*renderbuffer = NULL;
*format = (*tex_image)->TexFormat;
*internalFormat = (*tex_image)->InternalFormat;
+ *width = (*tex_image)->Width;
+ *height = (*tex_image)->Height;
}
return true;
@@ -423,6 +429,7 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
struct gl_renderbuffer *srcRenderbuffer, *dstRenderbuffer;
mesa_format srcFormat, dstFormat;
GLenum srcIntFormat, dstIntFormat;
+ GLuint src_w, src_h, dst_w, dst_h;
GLuint src_bw, src_bh, dst_bw, dst_bh;
int dstWidth, dstHeight, dstDepth;
int i;
@@ -445,17 +452,41 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
if (!prepare_target(ctx, srcName, srcTarget, srcLevel, srcZ, srcDepth,
&srcTexImage, &srcRenderbuffer, &srcFormat,
- &srcIntFormat, "src"))
+ &srcIntFormat, &src_w, &src_h, "src"))
return;
if (!prepare_target(ctx, dstName, dstTarget, dstLevel, dstZ, srcDepth,
&dstTexImage, &dstRenderbuffer, &dstFormat,
- &dstIntFormat, "dst"))
+ &dstIntFormat, &dst_w, &dst_h, "dst"))
return;
_mesa_get_format_block_size(srcFormat, &src_bw, &src_bh);
+
+ /* Section 18.3.2 (Copying Between Images) of the OpenGL 4.5 Core Profile
+ * spec says:
+ *
+ * An INVALID_VALUE error is generated if the dimensions of either
+ * subregion exceeds the boundaries of the corresponding image object,
+ * or if the image format is compressed and the dimensions of the
+ * subregion fail to meet the alignment constraints of the format.
+ *
+ * and Section 8.7 (Compressed Texture Images) says:
+ *
+ * An INVALID_OPERATION error is generated if any of the following
+ * conditions occurs:
+ *
+ * * width is not a multiple of four, and width + xoffset is not
+ * equal to the value of TEXTURE_WIDTH.
+ * * height is not a multiple of four, and height + yoffset is not
+ * equal to the value of TEXTURE_HEIGHT.
+ *
+ * so we take that to mean that you can copy the "last" block of a
+ * compressed texture image even if it's smaller than the minimum block
+ * dimensions.
+ */
if ((srcX % src_bw != 0) || (srcY % src_bh != 0) ||
- (srcWidth % src_bw != 0) || (srcHeight % src_bh != 0)) {
+ (srcWidth % src_bw != 0 && (srcX + srcWidth) != src_w) ||
+ (srcHeight % src_bh != 0 && (srcY + srcHeight) != src_h)) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(unaligned src rectangle)");
return;
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index d964f030ecb..e94d2b74749 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -42,35 +42,6 @@ struct gl_extensions _mesa_extension_override_disables;
static char *extra_extensions = NULL;
static char *cant_disable_extensions = NULL;
-enum {
- DISABLE = 0,
- GLL = 1 << API_OPENGL_COMPAT, /* GL Legacy / Compatibility */
- GLC = 1 << API_OPENGL_CORE, /* GL Core */
- GL = (1 << API_OPENGL_COMPAT) | (1 << API_OPENGL_CORE),
- ES1 = 1 << API_OPENGLES,
- ES2 = 1 << API_OPENGLES2,
- ES3 = 1 << (API_OPENGL_LAST + 1),
- ES31 = 1 << (API_OPENGL_LAST + 2),
-};
-
-/**
- * \brief An element of the \c extension_table.
- */
-struct extension {
- /** Name of extension, such as "GL_ARB_depth_clamp". */
- const char *name;
-
- /** Offset (in bytes) of the corresponding member in struct gl_extensions. */
- size_t offset;
-
- /** Set of API's in which the extension exists, as a bitset. */
- uint8_t api_set;
-
- /** Year the extension was proposed or approved. Used to sort the
- * extension string chronologically. */
- uint16_t year;
-};
-
/**
* Given a member \c x of struct gl_extensions, return offset of
@@ -82,341 +53,26 @@ struct extension {
/**
* \brief Table of supported OpenGL extensions for all API's.
*/
-static const struct extension extension_table[] = {
- /* ARB Extensions */
- { "GL_ARB_ES2_compatibility", o(ARB_ES2_compatibility), GL, 2009 },
- { "GL_ARB_ES3_compatibility", o(ARB_ES3_compatibility), GL, 2012 },
- { "GL_ARB_arrays_of_arrays", o(ARB_arrays_of_arrays), GL, 2012 },
- { "GL_ARB_base_instance", o(ARB_base_instance), GL, 2011 },
- { "GL_ARB_blend_func_extended", o(ARB_blend_func_extended), GL, 2009 },
- { "GL_ARB_buffer_storage", o(ARB_buffer_storage), GL, 2013 },
- { "GL_ARB_clear_buffer_object", o(dummy_true), GL, 2012 },
- { "GL_ARB_clear_texture", o(ARB_clear_texture), GL, 2013 },
- { "GL_ARB_clip_control", o(ARB_clip_control), GL, 2014 },
- { "GL_ARB_color_buffer_float", o(ARB_color_buffer_float), GL, 2004 },
- { "GL_ARB_compressed_texture_pixel_storage", o(dummy_true), GL, 2011 },
- { "GL_ARB_compute_shader", o(ARB_compute_shader), GL, 2012 },
- { "GL_ARB_conditional_render_inverted", o(ARB_conditional_render_inverted), GL, 2014 },
- { "GL_ARB_copy_buffer", o(dummy_true), GL, 2008 },
- { "GL_ARB_copy_image", o(ARB_copy_image), GL, 2012 },
- { "GL_ARB_conservative_depth", o(ARB_conservative_depth), GL, 2011 },
- { "GL_ARB_debug_output", o(dummy_true), GL, 2009 },
- { "GL_ARB_depth_buffer_float", o(ARB_depth_buffer_float), GL, 2008 },
- { "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 },
- { "GL_ARB_depth_texture", o(ARB_depth_texture), GLL, 2001 },
- { "GL_ARB_derivative_control", o(ARB_derivative_control), GL, 2014 },
- { "GL_ARB_direct_state_access", o(dummy_true), GLC, 2014 },
- { "GL_ARB_draw_buffers", o(dummy_true), GL, 2002 },
- { "GL_ARB_draw_buffers_blend", o(ARB_draw_buffers_blend), GL, 2009 },
- { "GL_ARB_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), GL, 2009 },
- { "GL_ARB_draw_indirect", o(ARB_draw_indirect), GLC, 2010 },
- { "GL_ARB_draw_instanced", o(ARB_draw_instanced), GL, 2008 },
- { "GL_ARB_explicit_attrib_location", o(ARB_explicit_attrib_location), GL, 2009 },
- { "GL_ARB_explicit_uniform_location", o(ARB_explicit_uniform_location), GL, 2012 },
- { "GL_ARB_fragment_coord_conventions", o(ARB_fragment_coord_conventions), GL, 2009 },
- { "GL_ARB_fragment_layer_viewport", o(ARB_fragment_layer_viewport), GLC, 2012 },
- { "GL_ARB_fragment_program", o(ARB_fragment_program), GLL, 2002 },
- { "GL_ARB_fragment_program_shadow", o(ARB_fragment_program_shadow), GLL, 2003 },
- { "GL_ARB_fragment_shader", o(ARB_fragment_shader), GL, 2002 },
- { "GL_ARB_framebuffer_no_attachments", o(ARB_framebuffer_no_attachments), GL, 2012 },
- { "GL_ARB_framebuffer_object", o(ARB_framebuffer_object), GL, 2005 },
- { "GL_ARB_framebuffer_sRGB", o(EXT_framebuffer_sRGB), GL, 1998 },
- { "GL_ARB_get_program_binary", o(dummy_true), GL, 2010 },
- { "GL_ARB_get_texture_sub_image", o(dummy_true), GL, 2014 },
- { "GL_ARB_gpu_shader5", o(ARB_gpu_shader5), GLC, 2010 },
- { "GL_ARB_gpu_shader_fp64", o(ARB_gpu_shader_fp64), GLC, 2010 },
- { "GL_ARB_half_float_pixel", o(dummy_true), GL, 2003 },
- { "GL_ARB_half_float_vertex", o(ARB_half_float_vertex), GL, 2008 },
- { "GL_ARB_instanced_arrays", o(ARB_instanced_arrays), GL, 2008 },
- { "GL_ARB_internalformat_query", o(ARB_internalformat_query), GL, 2011 },
- { "GL_ARB_invalidate_subdata", o(dummy_true), GL, 2012 },
- { "GL_ARB_map_buffer_alignment", o(dummy_true), GL, 2011 },
- { "GL_ARB_map_buffer_range", o(ARB_map_buffer_range), GL, 2008 },
- { "GL_ARB_multi_bind", o(dummy_true), GL, 2013 },
- { "GL_ARB_multi_draw_indirect", o(ARB_draw_indirect), GLC, 2012 },
- { "GL_ARB_multisample", o(dummy_true), GLL, 1994 },
- { "GL_ARB_multitexture", o(dummy_true), GLL, 1998 },
- { "GL_ARB_occlusion_query2", o(ARB_occlusion_query2), GL, 2003 },
- { "GL_ARB_occlusion_query", o(ARB_occlusion_query), GLL, 2001 },
- { "GL_ARB_pipeline_statistics_query", o(ARB_pipeline_statistics_query), GL, 2014 },
- { "GL_ARB_pixel_buffer_object", o(EXT_pixel_buffer_object), GL, 2004 },
- { "GL_ARB_point_parameters", o(EXT_point_parameters), GLL, 1997 },
- { "GL_ARB_point_sprite", o(ARB_point_sprite), GL, 2003 },
- { "GL_ARB_program_interface_query", o(dummy_true), GL, 2012 },
- { "GL_ARB_provoking_vertex", o(EXT_provoking_vertex), GL, 2009 },
- { "GL_ARB_robustness", o(dummy_true), GL, 2010 },
- { "GL_ARB_sample_shading", o(ARB_sample_shading), GL, 2009 },
- { "GL_ARB_sampler_objects", o(dummy_true), GL, 2009 },
- { "GL_ARB_seamless_cube_map", o(ARB_seamless_cube_map), GL, 2009 },
- { "GL_ARB_seamless_cubemap_per_texture", o(AMD_seamless_cubemap_per_texture), GL, 2013 },
- { "GL_ARB_separate_shader_objects", o(dummy_true), GL, 2010 },
- { "GL_ARB_shader_atomic_counters", o(ARB_shader_atomic_counters), GL, 2011 },
- { "GL_ARB_shader_bit_encoding", o(ARB_shader_bit_encoding), GL, 2010 },
- { "GL_ARB_shader_clock", o(ARB_shader_clock), GL, 2015 },
- { "GL_ARB_shader_image_load_store", o(ARB_shader_image_load_store), GL, 2011 },
- { "GL_ARB_shader_image_size", o(ARB_shader_image_size), GL, 2012 },
- { "GL_ARB_shader_objects", o(dummy_true), GL, 2002 },
- { "GL_ARB_shader_precision", o(ARB_shader_precision), GL, 2010 },
- { "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 },
- { "GL_ARB_shader_storage_buffer_object", o(ARB_shader_storage_buffer_object), GL, 2012 },
- { "GL_ARB_shader_subroutine", o(ARB_shader_subroutine), GLC, 2010 },
- { "GL_ARB_shader_texture_image_samples", o(ARB_shader_texture_image_samples), GL, 2014 },
- { "GL_ARB_shader_texture_lod", o(ARB_shader_texture_lod), GL, 2009 },
- { "GL_ARB_shading_language_100", o(dummy_true), GLL, 2003 },
- { "GL_ARB_shading_language_packing", o(ARB_shading_language_packing), GL, 2011 },
- { "GL_ARB_shading_language_420pack", o(ARB_shading_language_420pack), GL, 2011 },
- { "GL_ARB_shadow", o(ARB_shadow), GLL, 2001 },
- { "GL_ARB_stencil_texturing", o(ARB_stencil_texturing), GL, 2012 },
- { "GL_ARB_sync", o(ARB_sync), GL, 2003 },
- { "GL_ARB_texture_barrier", o(NV_texture_barrier), GL, 2014 },
- { "GL_ARB_tessellation_shader", o(ARB_tessellation_shader), GLC, 2009 },
- { "GL_ARB_texture_border_clamp", o(ARB_texture_border_clamp), GLL, 2000 },
- { "GL_ARB_texture_buffer_object", o(ARB_texture_buffer_object), GLC, 2008 },
- { "GL_ARB_texture_buffer_object_rgb32", o(ARB_texture_buffer_object_rgb32), GLC, 2009 },
- { "GL_ARB_texture_buffer_range", o(ARB_texture_buffer_range), GLC, 2012 },
- { "GL_ARB_texture_compression", o(dummy_true), GLL, 2000 },
- { "GL_ARB_texture_compression_bptc", o(ARB_texture_compression_bptc), GL, 2010 },
- { "GL_ARB_texture_compression_rgtc", o(ARB_texture_compression_rgtc), GL, 2004 },
- { "GL_ARB_texture_cube_map", o(ARB_texture_cube_map), GLL, 1999 },
- { "GL_ARB_texture_cube_map_array", o(ARB_texture_cube_map_array), GL, 2009 },
- { "GL_ARB_texture_env_add", o(dummy_true), GLL, 1999 },
- { "GL_ARB_texture_env_combine", o(ARB_texture_env_combine), GLL, 2001 },
- { "GL_ARB_texture_env_crossbar", o(ARB_texture_env_crossbar), GLL, 2001 },
- { "GL_ARB_texture_env_dot3", o(ARB_texture_env_dot3), GLL, 2001 },
- { "GL_ARB_texture_float", o(ARB_texture_float), GL, 2004 },
- { "GL_ARB_texture_gather", o(ARB_texture_gather), GL, 2009 },
- { "GL_ARB_texture_mirrored_repeat", o(dummy_true), GLL, 2001 },
- { "GL_ARB_texture_mirror_clamp_to_edge", o(ARB_texture_mirror_clamp_to_edge), GL, 2013 },
- { "GL_ARB_texture_multisample", o(ARB_texture_multisample), GL, 2009 },
- { "GL_ARB_texture_non_power_of_two", o(ARB_texture_non_power_of_two), GL, 2003 },
- { "GL_ARB_texture_query_levels", o(ARB_texture_query_levels), GL, 2012 },
- { "GL_ARB_texture_query_lod", o(ARB_texture_query_lod), GL, 2009 },
- { "GL_ARB_texture_rectangle", o(NV_texture_rectangle), GL, 2004 },
- { "GL_ARB_texture_rgb10_a2ui", o(ARB_texture_rgb10_a2ui), GL, 2009 },
- { "GL_ARB_texture_rg", o(ARB_texture_rg), GL, 2008 },
- { "GL_ARB_texture_stencil8", o(ARB_texture_stencil8), GL, 2013 },
- { "GL_ARB_texture_storage", o(dummy_true), GL, 2011 },
- { "GL_ARB_texture_storage_multisample", o(ARB_texture_multisample), GL, 2012 },
- { "GL_ARB_texture_view", o(ARB_texture_view), GL, 2012 },
- { "GL_ARB_texture_swizzle", o(EXT_texture_swizzle), GL, 2008 },
- { "GL_ARB_timer_query", o(ARB_timer_query), GL, 2010 },
- { "GL_ARB_transform_feedback2", o(ARB_transform_feedback2), GL, 2010 },
- { "GL_ARB_transform_feedback3", o(ARB_transform_feedback3), GL, 2010 },
- { "GL_ARB_transform_feedback_instanced", o(ARB_transform_feedback_instanced), GL, 2011 },
- { "GL_ARB_transpose_matrix", o(dummy_true), GLL, 1999 },
- { "GL_ARB_uniform_buffer_object", o(ARB_uniform_buffer_object), GL, 2009 },
- { "GL_ARB_vertex_array_bgra", o(EXT_vertex_array_bgra), GL, 2008 },
- { "GL_ARB_vertex_array_object", o(dummy_true), GL, 2006 },
- { "GL_ARB_vertex_attrib_binding", o(dummy_true), GL, 2012 },
- { "GL_ARB_vertex_buffer_object", o(dummy_true), GLL, 2003 },
- { "GL_ARB_vertex_program", o(ARB_vertex_program), GLL, 2002 },
- { "GL_ARB_vertex_shader", o(ARB_vertex_shader), GL, 2002 },
- { "GL_ARB_vertex_attrib_64bit", o(ARB_vertex_attrib_64bit), GLC, 2010 },
- { "GL_ARB_vertex_type_10f_11f_11f_rev", o(ARB_vertex_type_10f_11f_11f_rev), GL, 2013 },
- { "GL_ARB_vertex_type_2_10_10_10_rev", o(ARB_vertex_type_2_10_10_10_rev), GL, 2009 },
- { "GL_ARB_viewport_array", o(ARB_viewport_array), GLC, 2010 },
- { "GL_ARB_window_pos", o(dummy_true), GLL, 2001 },
- /* EXT extensions */
- { "GL_EXT_abgr", o(dummy_true), GL, 1995 },
- { "GL_EXT_bgra", o(dummy_true), GLL, 1995 },
- { "GL_EXT_blend_color", o(EXT_blend_color), GLL, 1995 },
- { "GL_EXT_blend_equation_separate", o(EXT_blend_equation_separate), GL, 2003 },
- { "GL_EXT_blend_func_separate", o(EXT_blend_func_separate), GLL, 1999 },
- { "GL_EXT_discard_framebuffer", o(dummy_true), ES1 | ES2, 2009 },
- { "GL_EXT_blend_minmax", o(EXT_blend_minmax), GLL | ES1 | ES2, 1995 },
- { "GL_EXT_blend_subtract", o(dummy_true), GLL, 1995 },
- { "GL_EXT_compiled_vertex_array", o(dummy_true), GLL, 1996 },
- { "GL_EXT_copy_texture", o(dummy_true), GLL, 1995 },
- { "GL_EXT_depth_bounds_test", o(EXT_depth_bounds_test), GL, 2002 },
- { "GL_EXT_draw_buffers", o(dummy_true), ES2, 2012 },
- { "GL_EXT_draw_buffers2", o(EXT_draw_buffers2), GL, 2006 },
- { "GL_EXT_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 },
- { "GL_EXT_draw_instanced", o(ARB_draw_instanced), GL, 2006 },
- { "GL_EXT_draw_range_elements", o(dummy_true), GLL, 1997 },
- { "GL_EXT_fog_coord", o(dummy_true), GLL, 1999 },
- { "GL_EXT_framebuffer_blit", o(dummy_true), GL, 2005 },
- { "GL_EXT_framebuffer_multisample", o(EXT_framebuffer_multisample), GL, 2005 },
- { "GL_EXT_framebuffer_multisample_blit_scaled", o(EXT_framebuffer_multisample_blit_scaled), GL, 2011 },
- { "GL_EXT_framebuffer_object", o(dummy_true), GLL, 2000 },
- { "GL_EXT_framebuffer_sRGB", o(EXT_framebuffer_sRGB), GL, 1998 },
- { "GL_EXT_gpu_program_parameters", o(EXT_gpu_program_parameters), GLL, 2006 },
- { "GL_EXT_gpu_shader4", o(EXT_gpu_shader4), GL, 2006 },
- { "GL_EXT_map_buffer_range", o(ARB_map_buffer_range), ES1 | ES2, 2012 },
- { "GL_EXT_multi_draw_arrays", o(dummy_true), GLL | ES1 | ES2, 1999 },
- { "GL_EXT_packed_depth_stencil", o(dummy_true), GL, 2005 },
- { "GL_EXT_packed_float", o(EXT_packed_float), GL, 2004 },
- { "GL_EXT_packed_pixels", o(dummy_true), GLL, 1997 },
- { "GL_EXT_pixel_buffer_object", o(EXT_pixel_buffer_object), GL, 2004 },
- { "GL_EXT_point_parameters", o(EXT_point_parameters), GLL, 1997 },
- { "GL_EXT_polygon_offset", o(dummy_true), GLL, 1995 },
- { "GL_EXT_polygon_offset_clamp", o(EXT_polygon_offset_clamp), GL, 2014 },
- { "GL_EXT_provoking_vertex", o(EXT_provoking_vertex), GL, 2009 },
- { "GL_EXT_rescale_normal", o(dummy_true), GLL, 1997 },
- { "GL_EXT_secondary_color", o(dummy_true), GLL, 1999 },
- { "GL_EXT_separate_shader_objects", o(dummy_true), ES2, 2013 },
- { "GL_EXT_separate_specular_color", o(dummy_true), GLL, 1997 },
- { "GL_EXT_shader_integer_mix", o(EXT_shader_integer_mix), GL | ES3, 2013 },
- { "GL_EXT_shadow_funcs", o(ARB_shadow), GLL, 2002 },
- { "GL_EXT_stencil_two_side", o(EXT_stencil_two_side), GLL, 2001 },
- { "GL_EXT_stencil_wrap", o(dummy_true), GLL, 2002 },
- { "GL_EXT_subtexture", o(dummy_true), GLL, 1995 },
- { "GL_EXT_texture3D", o(EXT_texture3D), GLL, 1996 },
- { "GL_EXT_texture_array", o(EXT_texture_array), GL, 2006 },
- { "GL_EXT_texture_compression_dxt1", o(ANGLE_texture_compression_dxt), GL | ES1 | ES2, 2004 },
- { "GL_ANGLE_texture_compression_dxt3", o(ANGLE_texture_compression_dxt), GL | ES1 | ES2, 2011 },
- { "GL_ANGLE_texture_compression_dxt5", o(ANGLE_texture_compression_dxt), GL | ES1 | ES2, 2011 },
- { "GL_EXT_texture_compression_latc", o(EXT_texture_compression_latc), GLL, 2006 },
- { "GL_EXT_texture_compression_rgtc", o(ARB_texture_compression_rgtc), GL, 2004 },
- { "GL_EXT_texture_compression_s3tc", o(EXT_texture_compression_s3tc), GL, 2000 },
- { "GL_EXT_texture_cube_map", o(ARB_texture_cube_map), GLL, 2001 },
- { "GL_EXT_texture_edge_clamp", o(dummy_true), GLL, 1997 },
- { "GL_EXT_texture_env_add", o(dummy_true), GLL, 1999 },
- { "GL_EXT_texture_env_combine", o(dummy_true), GLL, 2000 },
- { "GL_EXT_texture_env_dot3", o(EXT_texture_env_dot3), GLL, 2000 },
- { "GL_EXT_texture_filter_anisotropic", o(EXT_texture_filter_anisotropic), GL | ES1 | ES2, 1999 },
- { "GL_EXT_texture_format_BGRA8888", o(dummy_true), ES1 | ES2, 2005 },
- { "GL_EXT_texture_rg", o(ARB_texture_rg), ES2, 2011 },
- { "GL_EXT_read_format_bgra", o(dummy_true), ES1 | ES2, 2009 },
- { "GL_EXT_texture_integer", o(EXT_texture_integer), GL, 2006 },
- { "GL_EXT_texture_lod_bias", o(dummy_true), GLL | ES1, 1999 },
- { "GL_EXT_texture_mirror_clamp", o(EXT_texture_mirror_clamp), GL, 2004 },
- { "GL_EXT_texture_object", o(dummy_true), GLL, 1995 },
- { "GL_EXT_texture", o(dummy_true), GLL, 1996 },
- { "GL_EXT_texture_rectangle", o(NV_texture_rectangle), GLL, 2004 },
- { "GL_EXT_texture_shared_exponent", o(EXT_texture_shared_exponent), GL, 2004 },
- { "GL_EXT_texture_snorm", o(EXT_texture_snorm), GL, 2009 },
- { "GL_EXT_texture_sRGB", o(EXT_texture_sRGB), GL, 2004 },
- { "GL_EXT_texture_sRGB_decode", o(EXT_texture_sRGB_decode), GL, 2006 },
- { "GL_EXT_texture_swizzle", o(EXT_texture_swizzle), GL, 2008 },
- { "GL_EXT_texture_type_2_10_10_10_REV", o(dummy_true), ES2, 2008 },
- { "GL_EXT_timer_query", o(EXT_timer_query), GL, 2006 },
- { "GL_EXT_transform_feedback", o(EXT_transform_feedback), GL, 2011 },
- { "GL_EXT_unpack_subimage", o(dummy_true), ES2, 2011 },
- { "GL_EXT_vertex_array_bgra", o(EXT_vertex_array_bgra), GL, 2008 },
- { "GL_EXT_vertex_array", o(dummy_true), GLL, 1995 },
- { "GL_EXT_color_buffer_float", o(dummy_true), ES3, 2013 },
-
- /* OES extensions */
- { "GL_OES_blend_equation_separate", o(EXT_blend_equation_separate), ES1, 2009 },
- { "GL_OES_blend_func_separate", o(EXT_blend_func_separate), ES1, 2009 },
- { "GL_OES_blend_subtract", o(dummy_true), ES1, 2009 },
- { "GL_OES_byte_coordinates", o(dummy_true), ES1, 2002 },
- { "GL_OES_compressed_ETC1_RGB8_texture", o(OES_compressed_ETC1_RGB8_texture), ES1 | ES2, 2005 },
- { "GL_OES_compressed_paletted_texture", o(dummy_true), ES1, 2003 },
- { "GL_OES_depth24", o(dummy_true), ES1 | ES2, 2005 },
- { "GL_OES_depth32", o(dummy_false), DISABLE, 2005 },
- { "GL_OES_depth_texture", o(ARB_depth_texture), ES2, 2006 },
- { "GL_OES_depth_texture_cube_map", o(OES_depth_texture_cube_map), ES2, 2012 },
- { "GL_OES_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 },
- { "GL_OES_draw_texture", o(OES_draw_texture), ES1, 2004 },
- { "GL_OES_EGL_sync", o(dummy_true), ES1 | ES2, 2010 },
- /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
- { "GL_OES_EGL_image", o(OES_EGL_image), GL | ES1 | ES2, 2006 },
- { "GL_OES_EGL_image_external", o(OES_EGL_image_external), ES1 | ES2, 2010 },
- { "GL_OES_element_index_uint", o(dummy_true), ES1 | ES2, 2005 },
- { "GL_OES_fbo_render_mipmap", o(dummy_true), ES1 | ES2, 2005 },
- { "GL_OES_fixed_point", o(dummy_true), ES1, 2002 },
- { "GL_OES_framebuffer_object", o(dummy_true), ES1, 2005 },
- { "GL_OES_get_program_binary", o(dummy_true), ES2, 2008 },
- { "GL_OES_mapbuffer", o(dummy_true), ES1 | ES2, 2005 },
- { "GL_OES_packed_depth_stencil", o(dummy_true), ES1 | ES2, 2007 },
- { "GL_OES_point_size_array", o(dummy_true), ES1, 2004 },
- { "GL_OES_point_sprite", o(ARB_point_sprite), ES1, 2004 },
- { "GL_OES_query_matrix", o(dummy_true), ES1, 2003 },
- { "GL_OES_read_format", o(dummy_true), GL | ES1, 2003 },
- { "GL_OES_rgb8_rgba8", o(dummy_true), ES1 | ES2, 2005 },
- { "GL_OES_single_precision", o(dummy_true), ES1, 2003 },
- { "GL_OES_standard_derivatives", o(OES_standard_derivatives), ES2, 2005 },
- { "GL_OES_stencil1", o(dummy_false), DISABLE, 2005 },
- { "GL_OES_stencil4", o(dummy_false), DISABLE, 2005 },
- { "GL_OES_stencil8", o(dummy_true), ES1 | ES2, 2005 },
- { "GL_OES_stencil_wrap", o(dummy_true), ES1, 2002 },
- { "GL_OES_surfaceless_context", o(dummy_true), ES1 | ES2, 2012 },
- { "GL_OES_texture_3D", o(EXT_texture3D), ES2, 2005 },
- { "GL_OES_texture_cube_map", o(ARB_texture_cube_map), ES1, 2007 },
- { "GL_OES_texture_env_crossbar", o(ARB_texture_env_crossbar), ES1, 2005 },
- { "GL_OES_texture_float", o(OES_texture_float), ES2, 2005 },
- { "GL_OES_texture_float_linear", o(OES_texture_float_linear), ES2, 2005 },
- { "GL_OES_texture_half_float", o(OES_texture_half_float), ES2, 2005 },
- { "GL_OES_texture_half_float_linear", o(OES_texture_half_float_linear), ES2, 2005 },
- { "GL_OES_texture_mirrored_repeat", o(dummy_true), ES1, 2005 },
- { "GL_OES_texture_storage_multisample_2d_array",o(ARB_texture_multisample), ES31, 2014 },
- { "GL_OES_texture_npot", o(ARB_texture_non_power_of_two), ES1 | ES2, 2005 },
- { "GL_OES_vertex_array_object", o(dummy_true), ES1 | ES2, 2010 },
-
- /* KHR extensions */
- { "GL_KHR_debug", o(dummy_true), GL, 2012 },
- { "GL_KHR_context_flush_control", o(dummy_true), GL | ES2, 2014 },
- { "GL_KHR_texture_compression_astc_hdr", o(KHR_texture_compression_astc_hdr), GL | ES2, 2012 },
- { "GL_KHR_texture_compression_astc_ldr", o(KHR_texture_compression_astc_ldr), GL | ES2, 2012 },
-
- /* Vendor extensions */
- { "GL_3DFX_texture_compression_FXT1", o(TDFX_texture_compression_FXT1), GL, 1999 },
- { "GL_AMD_conservative_depth", o(ARB_conservative_depth), GL, 2009 },
- { "GL_AMD_draw_buffers_blend", o(ARB_draw_buffers_blend), GL, 2009 },
- { "GL_AMD_performance_monitor", o(AMD_performance_monitor), GL, 2007 },
- { "GL_AMD_pinned_memory", o(AMD_pinned_memory), GL, 2013 },
- { "GL_AMD_seamless_cubemap_per_texture", o(AMD_seamless_cubemap_per_texture), GL, 2009 },
- { "GL_AMD_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 },
- { "GL_AMD_shader_trinary_minmax", o(dummy_true), GL, 2012 },
- { "GL_AMD_vertex_shader_layer", o(AMD_vertex_shader_layer), GLC, 2012 },
- { "GL_AMD_vertex_shader_viewport_index", o(AMD_vertex_shader_viewport_index), GLC, 2012 },
- { "GL_APPLE_object_purgeable", o(APPLE_object_purgeable), GL, 2006 },
- { "GL_APPLE_packed_pixels", o(dummy_true), GLL, 2002 },
- { "GL_APPLE_texture_max_level", o(dummy_true), ES1 | ES2, 2009 },
- { "GL_APPLE_vertex_array_object", o(dummy_true), GLL, 2002 },
- { "GL_ATI_blend_equation_separate", o(EXT_blend_equation_separate), GL, 2003 },
- { "GL_ATI_draw_buffers", o(dummy_true), GLL, 2002 },
- { "GL_ATI_fragment_shader", o(ATI_fragment_shader), GLL, 2001 },
- { "GL_ATI_separate_stencil", o(ATI_separate_stencil), GLL, 2006 },
- { "GL_ATI_texture_compression_3dc", o(ATI_texture_compression_3dc), GLL, 2004 },
- { "GL_ATI_texture_env_combine3", o(ATI_texture_env_combine3), GLL, 2002 },
- { "GL_ATI_texture_float", o(ARB_texture_float), GL, 2002 },
- { "GL_ATI_texture_mirror_once", o(ATI_texture_mirror_once), GL, 2006 },
- { "GL_IBM_multimode_draw_arrays", o(dummy_true), GL, 1998 },
- { "GL_IBM_rasterpos_clip", o(dummy_true), GLL, 1996 },
- { "GL_IBM_texture_mirrored_repeat", o(dummy_true), GLL, 1998 },
- { "GL_INGR_blend_func_separate", o(EXT_blend_func_separate), GLL, 1999 },
- { "GL_INTEL_performance_query", o(INTEL_performance_query), GL | ES2, 2013 },
- { "GL_MESA_pack_invert", o(MESA_pack_invert), GL, 2002 },
- { "GL_MESA_texture_signed_rgba", o(EXT_texture_snorm), GL, 2009 },
- { "GL_MESA_window_pos", o(dummy_true), GLL, 2000 },
- { "GL_MESA_ycbcr_texture", o(MESA_ycbcr_texture), GL, 2002 },
- { "GL_NV_blend_square", o(dummy_true), GLL, 1999 },
- { "GL_NV_conditional_render", o(NV_conditional_render), GL, 2008 },
- { "GL_NV_depth_clamp", o(ARB_depth_clamp), GL, 2001 },
- { "GL_NV_draw_buffers", o(dummy_true), ES2, 2011 },
- { "GL_NV_fbo_color_attachments", o(dummy_true), ES2, 2010 },
- { "GL_NV_fog_distance", o(NV_fog_distance), GLL, 2001 },
- { "GL_NV_fragment_program_option", o(NV_fragment_program_option), GLL, 2005 },
- { "GL_NV_light_max_exponent", o(dummy_true), GLL, 1999 },
- { "GL_NV_packed_depth_stencil", o(dummy_true), GL, 2000 },
- { "GL_NV_point_sprite", o(NV_point_sprite), GL, 2001 },
- { "GL_NV_primitive_restart", o(NV_primitive_restart), GLL, 2002 },
- { "GL_NV_read_buffer", o(dummy_true), ES2, 2011 },
- { "GL_NV_read_depth", o(dummy_true), ES2, 2011 },
- { "GL_NV_read_depth_stencil", o(dummy_true), ES2, 2011 },
- { "GL_NV_read_stencil", o(dummy_true), ES2, 2011 },
- { "GL_NV_texgen_reflection", o(dummy_true), GLL, 1999 },
- { "GL_NV_texture_barrier", o(NV_texture_barrier), GL, 2009 },
- { "GL_NV_texture_env_combine4", o(NV_texture_env_combine4), GLL, 1999 },
- { "GL_NV_texture_rectangle", o(NV_texture_rectangle), GLL, 2000 },
- { "GL_NV_vdpau_interop", o(NV_vdpau_interop), GL, 2010 },
- { "GL_S3_s3tc", o(ANGLE_texture_compression_dxt), GL, 1999 },
- { "GL_SGIS_generate_mipmap", o(dummy_true), GLL, 1997 },
- { "GL_SGIS_texture_border_clamp", o(ARB_texture_border_clamp), GLL, 1997 },
- { "GL_SGIS_texture_edge_clamp", o(dummy_true), GLL, 1997 },
- { "GL_SGIS_texture_lod", o(dummy_true), GLL, 1997 },
- { "GL_SUN_multi_draw_arrays", o(dummy_true), GLL, 1999 },
-
- { 0, 0, 0, 0 },
+const struct mesa_extension _mesa_extension_table[] = {
+#define EXT(name_str, driver_cap, gll_ver, glc_ver, gles_ver, gles2_ver, yyyy) \
+ { .name = "GL_" #name_str, .offset = o(driver_cap), \
+ .version = { \
+ [API_OPENGL_COMPAT] = gll_ver, \
+ [API_OPENGL_CORE] = glc_ver, \
+ [API_OPENGLES] = gles_ver, \
+ [API_OPENGLES2] = gles2_ver, \
+ }, \
+ .year = yyyy \
+ },
+#include "extensions_table.h"
+#undef EXT
};
/**
* Given an extension name, lookup up the corresponding member of struct
* gl_extensions and return that member's offset (in bytes). If the name is
- * not found in the \c extension_table, return 0.
+ * not found in the \c _mesa_extension_table, return 0.
*
* \param name Name of extension.
* \return Offset of member in struct gl_extensions.
@@ -424,14 +80,14 @@ static const struct extension extension_table[] = {
static size_t
name_to_offset(const char* name)
{
- const struct extension *i;
+ unsigned i;
if (name == 0)
return 0;
- for (i = extension_table; i->name != 0; ++i) {
- if (strcmp(name, i->name) == 0)
- return i->offset;
+ for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
+ if (strcmp(name, _mesa_extension_table[i].name) == 0)
+ return _mesa_extension_table[i].offset;
}
return 0;
@@ -444,15 +100,16 @@ name_to_offset(const char* name)
static void
override_extensions_in_context(struct gl_context *ctx)
{
- const struct extension *i;
+ unsigned i;
const GLboolean *enables =
(GLboolean*) &_mesa_extension_override_enables;
const GLboolean *disables =
(GLboolean*) &_mesa_extension_override_disables;
GLboolean *ctx_ext = (GLboolean*)&ctx->Extensions;
- for (i = extension_table; i->name != 0; ++i) {
- size_t offset = i->offset;
+ for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
+ size_t offset = _mesa_extension_table[i].offset;
+
assert(!enables[offset] || !disables[offset]);
if (enables[offset]) {
ctx_ext[offset] = 1;
@@ -726,7 +383,6 @@ _mesa_init_extensions(struct gl_extensions *extensions)
/* Then, selectively turn default extensions on. */
extensions->dummy_true = GL_TRUE;
- extensions->EXT_texture3D = GL_TRUE;
}
@@ -734,18 +390,33 @@ typedef unsigned short extension_index;
/**
+ * Given an extension enum, return whether or not the extension is supported
+ * dependent on the following factors:
+ * There's driver support and the OpenGL/ES version is at least that
+ * specified in the _mesa_extension_table.
+ */
+static inline bool
+_mesa_extension_supported(const struct gl_context *ctx, extension_index i)
+{
+ const bool *base = (bool *) &ctx->Extensions;
+ const struct mesa_extension *ext = _mesa_extension_table + i;
+
+ return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
+}
+
+/**
* Compare two entries of the extensions table. Sorts first by year,
* then by name.
*
- * Arguments are indices into extension_table.
+ * Arguments are indices into _mesa_extension_table.
*/
static int
extension_compare(const void *p1, const void *p2)
{
extension_index i1 = * (const extension_index *) p1;
extension_index i2 = * (const extension_index *) p2;
- const struct extension *e1 = &extension_table[i1];
- const struct extension *e2 = &extension_table[i2];
+ const struct mesa_extension *e1 = &_mesa_extension_table[i1];
+ const struct mesa_extension *e2 = &_mesa_extension_table[i2];
int res;
res = (int)e1->year - (int)e2->year;
@@ -775,15 +446,9 @@ _mesa_make_extension_string(struct gl_context *ctx)
extension_index *extension_indices;
/* String of extra extensions. */
char *extra_extensions = get_extension_override(ctx);
- GLboolean *base = (GLboolean *) &ctx->Extensions;
- const struct extension *i;
+ unsigned k;
unsigned j;
unsigned maxYear = ~0;
- unsigned api_set = (1 << ctx->API);
- if (_mesa_is_gles3(ctx))
- api_set |= ES3;
- if (_mesa_is_gles31(ctx))
- api_set |= ES31;
/* Check if the MESA_EXTENSION_MAX_YEAR env var is set */
{
@@ -797,10 +462,11 @@ _mesa_make_extension_string(struct gl_context *ctx)
/* Compute length of the extension string. */
count = 0;
- for (i = extension_table; i->name != 0; ++i) {
- if (base[i->offset] &&
- i->year <= maxYear &&
- (i->api_set & api_set)) {
+ for (k = 0; k < ARRAY_SIZE(_mesa_extension_table); ++k) {
+ const struct mesa_extension *i = _mesa_extension_table + k;
+
+ if (i->year <= maxYear &&
+ _mesa_extension_supported(ctx, k)) {
length += strlen(i->name) + 1; /* +1 for space */
++count;
}
@@ -827,11 +493,10 @@ _mesa_make_extension_string(struct gl_context *ctx)
* expect will fit into that buffer.
*/
j = 0;
- for (i = extension_table; i->name != 0; ++i) {
- if (base[i->offset] &&
- i->year <= maxYear &&
- (i->api_set & api_set)) {
- extension_indices[j++] = i - extension_table;
+ for (k = 0; k < ARRAY_SIZE(_mesa_extension_table); ++k) {
+ if (_mesa_extension_table[k].year <= maxYear &&
+ _mesa_extension_supported(ctx, k)) {
+ extension_indices[j++] = k;
}
}
assert(j == count);
@@ -840,8 +505,8 @@ _mesa_make_extension_string(struct gl_context *ctx)
/* Build the extension string.*/
for (j = 0; j < count; ++j) {
- i = &extension_table[extension_indices[j]];
- assert(base[i->offset] && (i->api_set & api_set));
+ const struct mesa_extension *i = &_mesa_extension_table[extension_indices[j]];
+ assert(_mesa_extension_supported(ctx, extension_indices[j]));
strcat(exts, i->name);
strcat(exts, " ");
}
@@ -860,23 +525,15 @@ _mesa_make_extension_string(struct gl_context *ctx)
GLuint
_mesa_get_extension_count(struct gl_context *ctx)
{
- GLboolean *base;
- const struct extension *i;
- unsigned api_set = (1 << ctx->API);
- if (_mesa_is_gles3(ctx))
- api_set |= ES3;
- if (_mesa_is_gles31(ctx))
- api_set |= ES31;
+ unsigned k;
/* only count once */
if (ctx->Extensions.Count != 0)
return ctx->Extensions.Count;
- base = (GLboolean *) &ctx->Extensions;
- for (i = extension_table; i->name != 0; ++i) {
- if (base[i->offset] && (i->api_set & api_set)) {
+ for (k = 0; k < ARRAY_SIZE(_mesa_extension_table); ++k) {
+ if (_mesa_extension_supported(ctx, k))
ctx->Extensions.Count++;
- }
}
return ctx->Extensions.Count;
}
@@ -887,21 +544,13 @@ _mesa_get_extension_count(struct gl_context *ctx)
const GLubyte *
_mesa_get_enabled_extension(struct gl_context *ctx, GLuint index)
{
- const GLboolean *base;
- size_t n;
- const struct extension *i;
- unsigned api_set = (1 << ctx->API);
- if (_mesa_is_gles3(ctx))
- api_set |= ES3;
- if (_mesa_is_gles31(ctx))
- api_set |= ES31;
-
- base = (GLboolean*) &ctx->Extensions;
- n = 0;
- for (i = extension_table; i->name != 0; ++i) {
- if (base[i->offset] && (i->api_set & api_set)) {
+ size_t n = 0;
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
+ if (_mesa_extension_supported(ctx, i)) {
if (n == index)
- return (const GLubyte*) i->name;
+ return (const GLubyte*) _mesa_extension_table[i].name;
else
++n;
}
diff --git a/src/mesa/main/extensions.h b/src/mesa/main/extensions.h
index 595512a5d5c..1615e1cc0a4 100644
--- a/src/mesa/main/extensions.h
+++ b/src/mesa/main/extensions.h
@@ -55,6 +55,50 @@ _mesa_get_extension_count(struct gl_context *ctx);
extern const GLubyte *
_mesa_get_enabled_extension(struct gl_context *ctx, GLuint index);
+
+/**
+ * \brief An element of the \c extension_table.
+ */
+struct mesa_extension {
+ /** Name of extension, such as "GL_ARB_depth_clamp". */
+ const char *name;
+
+ /** Offset (in bytes) of the corresponding member in struct gl_extensions. */
+ size_t offset;
+
+ /** Minimum version the extension requires for the given API
+ * (see gl_api defined in mtypes.h). The value is equal to:
+ * 10 * major_version + minor_version
+ */
+ uint8_t version[API_OPENGL_LAST + 1];
+
+ /** Year the extension was proposed or approved. Used to sort the
+ * extension string chronologically. */
+ uint16_t year;
+};
+
+extern const struct mesa_extension _mesa_extension_table[];
+
+
+/* Generate enums for the functions below */
+enum {
+#define EXT(name_str, ...) MESA_EXTENSION_##name_str,
+#include "extensions_table.h"
+#undef EXT
+};
+
+
+/** Checks if the context suports a user-facing extension */
+#define EXT(name_str, driver_cap, ...) \
+static inline bool \
+_mesa_has_##name_str(const struct gl_context *ctx) \
+{ \
+ return ctx->Extensions.driver_cap && (ctx->Extensions.Version >= \
+ _mesa_extension_table[MESA_EXTENSION_##name_str].version[ctx->API]); \
+}
+#include "extensions_table.h"
+#undef EXT
+
extern struct gl_extensions _mesa_extension_override_enables;
extern struct gl_extensions _mesa_extension_override_disables;
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
new file mode 100644
index 00000000000..d12fd9f1c8d
--- /dev/null
+++ b/src/mesa/main/extensions_table.h
@@ -0,0 +1,335 @@
+#define GLL 0
+#define GLC 0
+#define ES1 0
+#define ES2 0
+#define x ~0
+EXT(ARB_ES2_compatibility , ARB_ES2_compatibility , GLL, GLC, x , x , 2009)
+EXT(ARB_ES3_compatibility , ARB_ES3_compatibility , GLL, GLC, x , x , 2012)
+EXT(ARB_arrays_of_arrays , ARB_arrays_of_arrays , GLL, GLC, x , x , 2012)
+EXT(ARB_base_instance , ARB_base_instance , GLL, GLC, x , x , 2011)
+EXT(ARB_blend_func_extended , ARB_blend_func_extended , GLL, GLC, x , x , 2009)
+EXT(ARB_buffer_storage , ARB_buffer_storage , GLL, GLC, x , x , 2013)
+EXT(ARB_clear_buffer_object , dummy_true , GLL, GLC, x , x , 2012)
+EXT(ARB_clear_texture , ARB_clear_texture , GLL, GLC, x , x , 2013)
+EXT(ARB_clip_control , ARB_clip_control , GLL, GLC, x , x , 2014)
+EXT(ARB_color_buffer_float , ARB_color_buffer_float , GLL, GLC, x , x , 2004)
+EXT(ARB_compressed_texture_pixel_storage , dummy_true , GLL, GLC, x , x , 2011)
+EXT(ARB_compute_shader , ARB_compute_shader , GLL, GLC, x , x , 2012)
+EXT(ARB_conditional_render_inverted , ARB_conditional_render_inverted , GLL, GLC, x , x , 2014)
+EXT(ARB_copy_buffer , dummy_true , GLL, GLC, x , x , 2008)
+EXT(ARB_copy_image , ARB_copy_image , GLL, GLC, x , x , 2012)
+EXT(ARB_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2011)
+EXT(ARB_debug_output , dummy_true , GLL, GLC, x , x , 2009)
+EXT(ARB_depth_buffer_float , ARB_depth_buffer_float , GLL, GLC, x , x , 2008)
+EXT(ARB_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2003)
+EXT(ARB_depth_texture , ARB_depth_texture , GLL, x , x , x , 2001)
+EXT(ARB_derivative_control , ARB_derivative_control , GLL, GLC, x , x , 2014)
+EXT(ARB_direct_state_access , dummy_true , x , GLC, x , x , 2014)
+EXT(ARB_draw_buffers , dummy_true , GLL, GLC, x , x , 2002)
+EXT(ARB_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009)
+EXT(ARB_draw_elements_base_vertex , ARB_draw_elements_base_vertex , GLL, GLC, x , x , 2009)
+EXT(ARB_draw_indirect , ARB_draw_indirect , x , GLC, x , x , 2010)
+EXT(ARB_draw_instanced , ARB_draw_instanced , GLL, GLC, x , x , 2008)
+EXT(ARB_enhanced_layouts , ARB_enhanced_layouts , x , GLC, x , x , 2013)
+EXT(ARB_explicit_attrib_location , ARB_explicit_attrib_location , GLL, GLC, x , x , 2009)
+EXT(ARB_explicit_uniform_location , ARB_explicit_uniform_location , GLL, GLC, x , x , 2012)
+EXT(ARB_fragment_coord_conventions , ARB_fragment_coord_conventions , GLL, GLC, x , x , 2009)
+EXT(ARB_fragment_layer_viewport , ARB_fragment_layer_viewport , x , GLC, x , x , 2012)
+EXT(ARB_fragment_program , ARB_fragment_program , GLL, x , x , x , 2002)
+EXT(ARB_fragment_program_shadow , ARB_fragment_program_shadow , GLL, x , x , x , 2003)
+EXT(ARB_fragment_shader , ARB_fragment_shader , GLL, GLC, x , x , 2002)
+EXT(ARB_framebuffer_no_attachments , ARB_framebuffer_no_attachments , GLL, GLC, x , x , 2012)
+EXT(ARB_framebuffer_object , ARB_framebuffer_object , GLL, GLC, x , x , 2005)
+EXT(ARB_framebuffer_sRGB , EXT_framebuffer_sRGB , GLL, GLC, x , x , 1998)
+EXT(ARB_get_program_binary , dummy_true , GLL, GLC, x , x , 2010)
+EXT(ARB_get_texture_sub_image , dummy_true , GLL, GLC, x , x , 2014)
+EXT(ARB_gpu_shader5 , ARB_gpu_shader5 , x , GLC, x , x , 2010)
+EXT(ARB_gpu_shader_fp64 , ARB_gpu_shader_fp64 , x , GLC, x , x , 2010)
+EXT(ARB_half_float_pixel , dummy_true , GLL, GLC, x , x , 2003)
+EXT(ARB_half_float_vertex , ARB_half_float_vertex , GLL, GLC, x , x , 2008)
+EXT(ARB_instanced_arrays , ARB_instanced_arrays , GLL, GLC, x , x , 2008)
+EXT(ARB_internalformat_query , ARB_internalformat_query , GLL, GLC, x , x , 2011)
+EXT(ARB_invalidate_subdata , dummy_true , GLL, GLC, x , x , 2012)
+EXT(ARB_map_buffer_alignment , dummy_true , GLL, GLC, x , x , 2011)
+EXT(ARB_map_buffer_range , ARB_map_buffer_range , GLL, GLC, x , x , 2008)
+EXT(ARB_multi_bind , dummy_true , GLL, GLC, x , x , 2013)
+EXT(ARB_multi_draw_indirect , ARB_draw_indirect , x , GLC, x , x , 2012)
+EXT(ARB_multisample , dummy_true , GLL, x , x , x , 1994)
+EXT(ARB_multitexture , dummy_true , GLL, x , x , x , 1998)
+EXT(ARB_occlusion_query2 , ARB_occlusion_query2 , GLL, GLC, x , x , 2003)
+EXT(ARB_occlusion_query , ARB_occlusion_query , GLL, x , x , x , 2001)
+EXT(ARB_pipeline_statistics_query , ARB_pipeline_statistics_query , GLL, GLC, x , x , 2014)
+EXT(ARB_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004)
+EXT(ARB_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997)
+EXT(ARB_point_sprite , ARB_point_sprite , GLL, GLC, x , x , 2003)
+EXT(ARB_program_interface_query , dummy_true , GLL, GLC, x , x , 2012)
+EXT(ARB_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
+EXT(ARB_robustness , dummy_true , GLL, GLC, x , x , 2010)
+EXT(ARB_sample_shading , ARB_sample_shading , GLL, GLC, x , x , 2009)
+EXT(ARB_sampler_objects , dummy_true , GLL, GLC, x , x , 2009)
+EXT(ARB_seamless_cube_map , ARB_seamless_cube_map , GLL, GLC, x , x , 2009)
+EXT(ARB_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2013)
+EXT(ARB_separate_shader_objects , dummy_true , GLL, GLC, x , x , 2010)
+EXT(ARB_shader_atomic_counters , ARB_shader_atomic_counters , GLL, GLC, x , x , 2011)
+EXT(ARB_shader_bit_encoding , ARB_shader_bit_encoding , GLL, GLC, x , x , 2010)
+EXT(ARB_shader_clock , ARB_shader_clock , GLL, GLC, x , x , 2015)
+EXT(ARB_shader_image_load_store , ARB_shader_image_load_store , GLL, GLC, x , x , 2011)
+EXT(ARB_shader_image_size , ARB_shader_image_size , GLL, GLC, x , x , 2012)
+EXT(ARB_shader_objects , dummy_true , GLL, GLC, x , x , 2002)
+EXT(ARB_shader_precision , ARB_shader_precision , GLL, GLC, x , x , 2010)
+EXT(ARB_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009)
+EXT(ARB_shader_storage_buffer_object , ARB_shader_storage_buffer_object , GLL, GLC, x , x , 2012)
+EXT(ARB_shader_subroutine , ARB_shader_subroutine , x , GLC, x , x , 2010)
+EXT(ARB_shader_texture_image_samples , ARB_shader_texture_image_samples , GLL, GLC, x , x , 2014)
+EXT(ARB_shader_texture_lod , ARB_shader_texture_lod , GLL, GLC, x , x , 2009)
+EXT(ARB_shading_language_100 , dummy_true , GLL, x , x , x , 2003)
+EXT(ARB_shading_language_packing , ARB_shading_language_packing , GLL, GLC, x , x , 2011)
+EXT(ARB_shading_language_420pack , ARB_shading_language_420pack , GLL, GLC, x , x , 2011)
+EXT(ARB_shadow , ARB_shadow , GLL, x , x , x , 2001)
+EXT(ARB_stencil_texturing , ARB_stencil_texturing , GLL, GLC, x , x , 2012)
+EXT(ARB_sync , ARB_sync , GLL, GLC, x , x , 2003)
+EXT(ARB_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2014)
+EXT(ARB_tessellation_shader , ARB_tessellation_shader , x , GLC, x , x , 2009)
+EXT(ARB_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 2000)
+EXT(ARB_texture_buffer_object , ARB_texture_buffer_object , x , GLC, x , x , 2008)
+EXT(ARB_texture_buffer_object_rgb32 , ARB_texture_buffer_object_rgb32 , x , GLC, x , x , 2009)
+EXT(ARB_texture_buffer_range , ARB_texture_buffer_range , x , GLC, x , x , 2012)
+EXT(ARB_texture_compression , dummy_true , GLL, x , x , x , 2000)
+EXT(ARB_texture_compression_bptc , ARB_texture_compression_bptc , GLL, GLC, x , x , 2010)
+EXT(ARB_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004)
+EXT(ARB_texture_cube_map , ARB_texture_cube_map , GLL, x , x , x , 1999)
+EXT(ARB_texture_cube_map_array , ARB_texture_cube_map_array , GLL, GLC, x , x , 2009)
+EXT(ARB_texture_env_add , dummy_true , GLL, x , x , x , 1999)
+EXT(ARB_texture_env_combine , ARB_texture_env_combine , GLL, x , x , x , 2001)
+EXT(ARB_texture_env_crossbar , ARB_texture_env_crossbar , GLL, x , x , x , 2001)
+EXT(ARB_texture_env_dot3 , ARB_texture_env_dot3 , GLL, x , x , x , 2001)
+EXT(ARB_texture_float , ARB_texture_float , GLL, GLC, x , x , 2004)
+EXT(ARB_texture_gather , ARB_texture_gather , GLL, GLC, x , x , 2009)
+EXT(ARB_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 2001)
+EXT(ARB_texture_mirror_clamp_to_edge , ARB_texture_mirror_clamp_to_edge , GLL, GLC, x , x , 2013)
+EXT(ARB_texture_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2009)
+EXT(ARB_texture_non_power_of_two , ARB_texture_non_power_of_two , GLL, GLC, x , x , 2003)
+EXT(ARB_texture_query_levels , ARB_texture_query_levels , GLL, GLC, x , x , 2012)
+EXT(ARB_texture_query_lod , ARB_texture_query_lod , GLL, GLC, x , x , 2009)
+EXT(ARB_texture_rectangle , NV_texture_rectangle , GLL, GLC, x , x , 2004)
+EXT(ARB_texture_rgb10_a2ui , ARB_texture_rgb10_a2ui , GLL, GLC, x , x , 2009)
+EXT(ARB_texture_rg , ARB_texture_rg , GLL, GLC, x , x , 2008)
+EXT(ARB_texture_stencil8 , ARB_texture_stencil8 , GLL, GLC, x , x , 2013)
+EXT(ARB_texture_storage , dummy_true , GLL, GLC, x , x , 2011)
+EXT(ARB_texture_storage_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2012)
+EXT(ARB_texture_view , ARB_texture_view , GLL, GLC, x , x , 2012)
+EXT(ARB_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008)
+EXT(ARB_timer_query , ARB_timer_query , GLL, GLC, x , x , 2010)
+EXT(ARB_transform_feedback2 , ARB_transform_feedback2 , GLL, GLC, x , x , 2010)
+EXT(ARB_transform_feedback3 , ARB_transform_feedback3 , GLL, GLC, x , x , 2010)
+EXT(ARB_transform_feedback_instanced , ARB_transform_feedback_instanced , GLL, GLC, x , x , 2011)
+EXT(ARB_transpose_matrix , dummy_true , GLL, x , x , x , 1999)
+EXT(ARB_uniform_buffer_object , ARB_uniform_buffer_object , GLL, GLC, x , x , 2009)
+EXT(ARB_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008)
+EXT(ARB_vertex_array_object , dummy_true , GLL, GLC, x , x , 2006)
+EXT(ARB_vertex_attrib_binding , dummy_true , GLL, GLC, x , x , 2012)
+EXT(ARB_vertex_buffer_object , dummy_true , GLL, x , x , x , 2003)
+EXT(ARB_vertex_program , ARB_vertex_program , GLL, x , x , x , 2002)
+EXT(ARB_vertex_shader , ARB_vertex_shader , GLL, GLC, x , x , 2002)
+EXT(ARB_vertex_attrib_64bit , ARB_vertex_attrib_64bit , x , GLC, x , x , 2010)
+EXT(ARB_vertex_type_10f_11f_11f_rev , ARB_vertex_type_10f_11f_11f_rev , GLL, GLC, x , x , 2013)
+EXT(ARB_vertex_type_2_10_10_10_rev , ARB_vertex_type_2_10_10_10_rev , GLL, GLC, x , x , 2009)
+EXT(ARB_viewport_array , ARB_viewport_array , x , GLC, x , x , 2010)
+EXT(ARB_window_pos , dummy_true , GLL, x , x , x , 2001)
+
+EXT(EXT_abgr , dummy_true , GLL, GLC, x , x , 1995)
+EXT(EXT_bgra , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_blend_color , EXT_blend_color , GLL, x , x , x , 1995)
+EXT(EXT_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
+EXT(EXT_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
+EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015)
+EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009)
+EXT(EXT_blend_minmax , EXT_blend_minmax , GLL, x , ES1, ES2, 1995)
+EXT(EXT_blend_subtract , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_compiled_vertex_array , dummy_true , GLL, x , x , x , 1996)
+EXT(EXT_copy_texture , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_depth_bounds_test , EXT_depth_bounds_test , GLL, GLC, x , x , 2002)
+EXT(EXT_draw_buffers , dummy_true , x , x , x , ES2, 2012)
+EXT(EXT_draw_buffers2 , EXT_draw_buffers2 , GLL, GLC, x , x , 2006)
+EXT(EXT_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014)
+EXT(EXT_draw_instanced , ARB_draw_instanced , GLL, GLC, x , x , 2006)
+EXT(EXT_draw_range_elements , dummy_true , GLL, x , x , x , 1997)
+EXT(EXT_fog_coord , dummy_true , GLL, x , x , x , 1999)
+EXT(EXT_framebuffer_blit , dummy_true , GLL, GLC, x , x , 2005)
+EXT(EXT_framebuffer_multisample , EXT_framebuffer_multisample , GLL, GLC, x , x , 2005)
+EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GLL, GLC, x , x , 2011)
+EXT(EXT_framebuffer_object , dummy_true , GLL, x , x , x , 2000)
+EXT(EXT_framebuffer_sRGB , EXT_framebuffer_sRGB , GLL, GLC, x , x , 1998)
+EXT(EXT_gpu_program_parameters , EXT_gpu_program_parameters , GLL, x , x , x , 2006)
+EXT(EXT_gpu_shader4 , EXT_gpu_shader4 , GLL, GLC, x , x , 2006)
+EXT(EXT_map_buffer_range , ARB_map_buffer_range , x , x , ES1, ES2, 2012)
+EXT(EXT_multi_draw_arrays , dummy_true , GLL, x , ES1, ES2, 1999)
+EXT(EXT_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2005)
+EXT(EXT_packed_float , EXT_packed_float , GLL, GLC, x , x , 2004)
+EXT(EXT_packed_pixels , dummy_true , GLL, x , x , x , 1997)
+EXT(EXT_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004)
+EXT(EXT_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997)
+EXT(EXT_polygon_offset , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, x , x , 2014)
+EXT(EXT_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
+EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997)
+EXT(EXT_secondary_color , dummy_true , GLL, x , x , x , 1999)
+EXT(EXT_separate_shader_objects , dummy_true , x , x , x , ES2, 2013)
+EXT(EXT_separate_specular_color , dummy_true , GLL, x , x , x , 1997)
+EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, ES1, 30, 2013)
+EXT(EXT_shadow_funcs , ARB_shadow , GLL, x , x , x , 2002)
+EXT(EXT_stencil_two_side , EXT_stencil_two_side , GLL, x , x , x , 2001)
+EXT(EXT_stencil_wrap , dummy_true , GLL, x , x , x , 2002)
+EXT(EXT_subtexture , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996)
+EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006)
+EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004)
+EXT(ANGLE_texture_compression_dxt3 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011)
+EXT(ANGLE_texture_compression_dxt5 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011)
+EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006)
+EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004)
+EXT(EXT_texture_compression_s3tc , EXT_texture_compression_s3tc , GLL, GLC, x , x , 2000)
+EXT(EXT_texture_cube_map , ARB_texture_cube_map , GLL, x , x , x , 2001)
+EXT(EXT_texture_edge_clamp , dummy_true , GLL, x , x , x , 1997)
+EXT(EXT_texture_env_add , dummy_true , GLL, x , x , x , 1999)
+EXT(EXT_texture_env_combine , dummy_true , GLL, x , x , x , 2000)
+EXT(EXT_texture_env_dot3 , EXT_texture_env_dot3 , GLL, x , x , x , 2000)
+EXT(EXT_texture_filter_anisotropic , EXT_texture_filter_anisotropic , GLL, GLC, ES1, ES2, 1999)
+EXT(EXT_texture_format_BGRA8888 , dummy_true , x , x , ES1, ES2, 2005)
+EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011)
+EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009)
+EXT(EXT_texture_integer , EXT_texture_integer , GLL, GLC, x , x , 2006)
+EXT(EXT_texture_lod_bias , dummy_true , GLL, x , ES1, x , 1999)
+EXT(EXT_texture_mirror_clamp , EXT_texture_mirror_clamp , GLL, GLC, x , x , 2004)
+EXT(EXT_texture_object , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996)
+EXT(EXT_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2004)
+EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004)
+EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009)
+EXT(EXT_texture_sRGB , EXT_texture_sRGB , GLL, GLC, x , x , 2004)
+EXT(EXT_texture_sRGB_decode , EXT_texture_sRGB_decode , GLL, GLC, x , x , 2006)
+EXT(EXT_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008)
+EXT(EXT_texture_type_2_10_10_10_REV , dummy_true , x , x , x , ES2, 2008)
+EXT(EXT_timer_query , EXT_timer_query , GLL, GLC, x , x , 2006)
+EXT(EXT_transform_feedback , EXT_transform_feedback , GLL, GLC, x , x , 2011)
+EXT(EXT_unpack_subimage , dummy_true , x , x , x , ES2, 2011)
+EXT(EXT_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008)
+EXT(EXT_vertex_array , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013)
+
+
+EXT(OES_blend_equation_separate , EXT_blend_equation_separate , x , x , ES1, x , 2009)
+EXT(OES_blend_func_separate , EXT_blend_func_separate , x , x , ES1, x , 2009)
+EXT(OES_blend_subtract , dummy_true , x , x , ES1, x , 2009)
+EXT(OES_byte_coordinates , dummy_true , x , x , ES1, x , 2002)
+EXT(OES_compressed_ETC1_RGB8_texture , OES_compressed_ETC1_RGB8_texture , x , x , ES1, ES2, 2005)
+EXT(OES_compressed_paletted_texture , dummy_true , x , x , ES1, x , 2003)
+EXT(OES_depth24 , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_depth32 , dummy_false , x , x , x , x , 2005)
+EXT(OES_depth_texture , ARB_depth_texture , x , x , x , ES2, 2006)
+EXT(OES_depth_texture_cube_map , OES_depth_texture_cube_map , x , x , x , ES2, 2012)
+EXT(OES_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014)
+EXT(OES_draw_texture , OES_draw_texture , x , x , ES1, x , 2004)
+EXT(OES_EGL_sync , dummy_true , x , x , ES1, ES2, 2010)
+EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
+EXT(OES_element_index_uint , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002)
+EXT(OES_framebuffer_object , dummy_true , x , x , ES1, x , 2005)
+EXT(OES_get_program_binary , dummy_true , x , x , x , ES2, 2008)
+EXT(OES_mapbuffer , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_packed_depth_stencil , dummy_true , x , x , ES1, ES2, 2007)
+EXT(OES_point_size_array , dummy_true , x , x , ES1, x , 2004)
+EXT(OES_point_sprite , ARB_point_sprite , x , x , ES1, x , 2004)
+EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003)
+EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003)
+EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003)
+EXT(OES_standard_derivatives , OES_standard_derivatives , x , x , x , ES2, 2005)
+EXT(OES_stencil1 , dummy_false , x , x , x , x , 2005)
+EXT(OES_stencil4 , dummy_false , x , x , x , x , 2005)
+EXT(OES_stencil8 , dummy_true , x , x , ES1, ES2, 2005)
+EXT(OES_stencil_wrap , dummy_true , x , x , ES1, x , 2002)
+EXT(OES_surfaceless_context , dummy_true , x , x , ES1, ES2, 2012)
+EXT(OES_texture_3D , dummy_true , x , x , x , ES2, 2005)
+EXT(OES_texture_cube_map , ARB_texture_cube_map , x , x , ES1, x , 2007)
+EXT(OES_texture_env_crossbar , ARB_texture_env_crossbar , x , x , ES1, x , 2005)
+EXT(OES_texture_float , OES_texture_float , x , x , x , ES2, 2005)
+EXT(OES_texture_float_linear , OES_texture_float_linear , x , x , x , ES2, 2005)
+EXT(OES_texture_half_float , OES_texture_half_float , x , x , x , ES2, 2005)
+EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005)
+EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005)
+EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014)
+EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005)
+EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010)
+
+
+EXT(KHR_debug , dummy_true , GLL, GLC, x , x , 2012)
+EXT(KHR_context_flush_control , dummy_true , GLL, GLC, x , ES2, 2014)
+EXT(KHR_texture_compression_astc_hdr , KHR_texture_compression_astc_hdr , GLL, GLC, x , ES2, 2012)
+EXT(KHR_texture_compression_astc_ldr , KHR_texture_compression_astc_ldr , GLL, GLC, x , ES2, 2012)
+
+
+EXT(3DFX_texture_compression_FXT1 , TDFX_texture_compression_FXT1 , GLL, GLC, x , x , 1999)
+EXT(AMD_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2009)
+EXT(AMD_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009)
+EXT(AMD_performance_monitor , AMD_performance_monitor , GLL, GLC, x , x , 2007)
+EXT(AMD_pinned_memory , AMD_pinned_memory , GLL, GLC, x , x , 2013)
+EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009)
+EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009)
+EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012)
+EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , x , GLC, x , x , 2012)
+EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , x , GLC, x , x , 2012)
+EXT(APPLE_object_purgeable , APPLE_object_purgeable , GLL, GLC, x , x , 2006)
+EXT(APPLE_packed_pixels , dummy_true , GLL, x , x , x , 2002)
+EXT(APPLE_texture_max_level , dummy_true , x , x , ES1, ES2, 2009)
+EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002)
+EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
+EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002)
+EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001)
+EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006)
+EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004)
+EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002)
+EXT(ATI_texture_float , ARB_texture_float , GLL, GLC, x , x , 2002)
+EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006)
+EXT(IBM_multimode_draw_arrays , dummy_true , GLL, GLC, x , x , 1998)
+EXT(IBM_rasterpos_clip , dummy_true , GLL, x , x , x , 1996)
+EXT(IBM_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 1998)
+EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
+EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013)
+EXT(MESA_pack_invert , MESA_pack_invert , GLL, GLC, x , x , 2002)
+EXT(MESA_texture_signed_rgba , EXT_texture_snorm , GLL, GLC, x , x , 2009)
+EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000)
+EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002)
+EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999)
+EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008)
+EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001)
+EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010)
+EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001)
+EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005)
+EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999)
+EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000)
+EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001)
+EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002)
+EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_read_depth , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_read_depth_stencil , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_read_stencil , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_texgen_reflection , dummy_true , GLL, x , x , x , 1999)
+EXT(NV_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2009)
+EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
+EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
+EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
+EXT(S3_s3tc , ANGLE_texture_compression_dxt , GLL, GLC, x , x , 1999)
+EXT(SGIS_generate_mipmap , dummy_true , GLL, x , x , x , 1997)
+EXT(SGIS_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 1997)
+EXT(SGIS_texture_edge_clamp , dummy_true , GLL, x , x , x , 1997)
+EXT(SGIS_texture_lod , dummy_true , GLL, x , x , x , 1997)
+EXT(SUN_multi_draw_arrays , dummy_true , GLL, x , x , x , 1999)
+#undef GLL
+#undef GLC
+#undef ES1
+#undef ES2
+#undef x
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 02dd257d79d..95cbba4ed57 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2721,13 +2721,14 @@ struct gl_shader_program
struct gl_uniform_block **ShaderStorageBlocks;
/**
- * Indices into the _LinkedShaders's UniformBlocks[] array for each stage
- * they're used in, or -1.
+ * Indices into the BufferInterfaceBlocks[] array for each stage they're
+ * used in, or -1.
*
- * This is used to maintain the Binding values of the stage's UniformBlocks[]
- * and to answer the GL_UNIFORM_BLOCK_REFERENCED_BY_*_SHADER queries.
+ * This is used to maintain the Binding values of the stage's
+ * BufferInterfaceBlocks[] and to answer the
+ * GL_UNIFORM_BLOCK_REFERENCED_BY_*_SHADER queries.
*/
- int *UniformBlockStageIndex[MESA_SHADER_STAGES];
+ int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
/**
* Map of active uniform names to locations
@@ -2879,6 +2880,8 @@ struct gl_shader_compiler_options
*/
GLboolean OptimizeForAOS;
+ GLboolean LowerBufferInterfaceBlocks; /**< Lower UBO and SSBO access to intrinsics. */
+
const struct nir_shader_compiler_options *NirOptions;
};
@@ -3582,11 +3585,24 @@ struct gl_constants
* below:
* SampleMap8x = {a, b, c, d, e, f, g, h};
*
- * Follow the logic for other sample counts.
+ * Follow the logic for sample counts 2-8.
+ *
+ * For 16x the sample indices layout as a 4x4 grid as follows:
+ *
+ * -----------------
+ * | 0 | 1 | 2 | 3 |
+ * -----------------
+ * | 4 | 5 | 6 | 7 |
+ * -----------------
+ * | 8 | 9 |10 |11 |
+ * -----------------
+ * |12 |13 |14 |15 |
+ * -----------------
*/
uint8_t SampleMap2x[2];
uint8_t SampleMap4x[4];
uint8_t SampleMap8x[8];
+ uint8_t SampleMap16x[16];
/** GL_ARB_shader_atomic_counters */
GLuint MaxAtomicBufferBindings;
@@ -3667,6 +3683,7 @@ struct gl_extensions
GLboolean ARB_fragment_shader;
GLboolean ARB_framebuffer_no_attachments;
GLboolean ARB_framebuffer_object;
+ GLboolean ARB_enhanced_layouts;
GLboolean ARB_explicit_attrib_location;
GLboolean ARB_explicit_uniform_location;
GLboolean ARB_geometry_shader4;
@@ -3750,7 +3767,6 @@ struct gl_extensions
GLboolean EXT_provoking_vertex;
GLboolean EXT_shader_integer_mix;
GLboolean EXT_stencil_two_side;
- GLboolean EXT_texture3D;
GLboolean EXT_texture_array;
GLboolean EXT_texture_compression_latc;
GLboolean EXT_texture_compression_s3tc;
@@ -3808,6 +3824,12 @@ struct gl_extensions
const GLubyte *String;
/** Number of supported extensions */
GLuint Count;
+ /**
+ * The context version which extension helper functions compare against.
+ * By default, the value is equal to ctx->Version. This changes to ~0
+ * while meta is in progress.
+ */
+ GLubyte Version;
};
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 699a2ae47eb..90dff13485b 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -907,6 +907,21 @@ _mesa_ValidateProgramPipeline(GLuint pipeline)
_mesa_validate_program_pipeline(ctx, pipe,
(ctx->_Shader->Name == pipe->Name));
+
+ /* Validate inputs against outputs, this cannot be done during linking
+ * since programs have been linked separately from each other.
+ *
+ * From OpenGL 4.5 Core spec:
+ * "Separable program objects may have validation failures that cannot be
+ * detected without the complete program pipeline. Mismatched interfaces,
+ * improper usage of program objects together, and the same
+ * state-dependent failures can result in validation errors for such
+ * program objects."
+ *
+ * OpenGL ES 3.1 specification has the same text.
+ */
+ if (!_mesa_validate_pipeline_io(pipe))
+ pipe->Validated = GL_FALSE;
}
void GLAPIENTRY
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index dd51bba3386..58ba04153e6 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -980,7 +980,7 @@ is_resource_referenced(struct gl_shader_program *shProg,
return RESOURCE_ATC(res)->StageReferences[stage];
if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
- return shProg->UniformBlockStageIndex[stage][index] != -1;
+ return shProg->InterfaceBlockStageIndex[stage][index] != -1;
return res->StageReferences & (1 << stage);
}
@@ -1359,3 +1359,65 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg,
if (length)
*length = amount;
}
+
+static bool
+validate_io(const struct gl_shader *input_stage,
+ const struct gl_shader *output_stage)
+{
+ assert(input_stage && output_stage);
+
+ /* For each output in a, find input in b and do any required checks. */
+ foreach_in_list(ir_instruction, out, input_stage->ir) {
+ ir_variable *out_var = out->as_variable();
+ if (!out_var || out_var->data.mode != ir_var_shader_out)
+ continue;
+
+ foreach_in_list(ir_instruction, in, output_stage->ir) {
+ ir_variable *in_var = in->as_variable();
+ if (!in_var || in_var->data.mode != ir_var_shader_in)
+ continue;
+
+ if (strcmp(in_var->name, out_var->name) == 0) {
+ /* From OpenGL ES 3.1 spec:
+ * "When both shaders are in separate programs, mismatched
+ * precision qualifiers will result in a program interface
+ * mismatch that will result in program pipeline validation
+ * failures, as described in section 7.4.1 (“Shader Interface
+ * Matching”) of the OpenGL ES 3.1 Specification."
+ */
+ if (in_var->data.precision != out_var->data.precision)
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/**
+ * Validate inputs against outputs in a program pipeline.
+ */
+extern "C" bool
+_mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline)
+{
+ struct gl_shader_program **shProg =
+ (struct gl_shader_program **) pipeline->CurrentProgram;
+
+ /* Find first active stage in pipeline. */
+ unsigned idx, prev = 0;
+ for (idx = 0; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
+ if (shProg[idx]) {
+ prev = idx;
+ break;
+ }
+ }
+
+ for (idx = prev + 1; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
+ if (shProg[idx]) {
+ if (!validate_io(shProg[prev]->_LinkedShaders[prev],
+ shProg[idx]->_LinkedShaders[idx]))
+ return false;
+ prev = idx;
+ }
+ }
+ return true;
+}
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index ffc71931fec..203ccef7fc4 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -294,8 +294,8 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
shProg->BufferInterfaceBlocks = NULL;
shProg->NumBufferInterfaceBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
- ralloc_free(shProg->UniformBlockStageIndex[i]);
- shProg->UniformBlockStageIndex[i] = NULL;
+ ralloc_free(shProg->InterfaceBlockStageIndex[i]);
+ shProg->InterfaceBlockStageIndex[i] = NULL;
}
ralloc_free(shProg->AtomicBuffers);
diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h
index 796de470735..be80752d7f2 100644
--- a/src/mesa/main/shaderobj.h
+++ b/src/mesa/main/shaderobj.h
@@ -234,6 +234,9 @@ _mesa_shader_stage_to_subroutine_uniform(gl_shader_stage stage)
}
}
+extern bool
+_mesa_validate_pipeline_io(struct gl_pipeline_object *);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index ac2d2332df8..abe0f432572 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2506,5 +2506,8 @@ const struct function gles31_functions_possible[] = {
/* GL_OES_texture_storage_multisample_2d_array */
{ "glTexStorage3DMultisampleOES", 31, -1 },
+ /* GL_EXT_buffer_storage */
+ { "glBufferStorageEXT", 31, -1 },
+
{ NULL, 0, -1 },
};
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index cb147fac476..9d88554d945 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -330,7 +330,8 @@ _mesa_ClientActiveTexture(GLenum texture)
return;
if (texUnit >= ctx->Const.MaxTextureCoordUnits) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glClientActiveTexture(texture)");
+ _mesa_error(ctx, GL_INVALID_ENUM, "glClientActiveTexture(texture=%s)",
+ _mesa_enum_to_string(texture));
return;
}
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index bc235380d97..758ca2456df 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -1026,7 +1026,7 @@ _mesa_UniformBlockBinding(GLuint program,
shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding = uniformBlockBinding;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
- int stage_index = shProg->UniformBlockStageIndex[i][uniformBlockIndex];
+ int stage_index = shProg->InterfaceBlockStageIndex[i][uniformBlockIndex];
if (stage_index != -1) {
struct gl_shader *sh = shProg->_LinkedShaders[i];
@@ -1079,7 +1079,7 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
- int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
+ int stage_index = shProg->InterfaceBlockStageIndex[i][shaderStorageBlockIndex];
if (stage_index != -1) {
struct gl_shader *sh = shProg->_LinkedShaders[i];
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 5635a643200..314b26dc74f 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -524,6 +524,7 @@ _mesa_compute_version(struct gl_context *ctx)
return;
ctx->Version = _mesa_get_version(&ctx->Extensions, &ctx->Const, ctx->API);
+ ctx->Extensions.Version = ctx->Version;
/* Make sure that the GLSL version lines up with the GL version. In some
* cases it can be too high, e.g. if an extension is missing.
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 8afd336779f..5d20b26d26e 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -83,6 +83,7 @@ st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
if (st_obj->buffer)
pipe_resource_reference(&st_obj->buffer, NULL);
+ mtx_destroy(&st_obj->Base.Mutex);
free(st_obj->Base.Label);
free(st_obj);
}
diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c
index 75114cdb712..03a7294e7c9 100644
--- a/src/mesa/state_tracker/st_cb_copyimage.c
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -552,6 +552,10 @@ st_CopyImageSubData(struct gl_context *ctx,
src_res = src->pt;
src_level = src_image->Level;
src_z += src_image->Face;
+ if (src_image->TexObject->Immutable) {
+ src_level += src_image->TexObject->MinLevel;
+ src_z += src_image->TexObject->MinLayer;
+ }
} else {
struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
src_res = src->texture;
@@ -563,6 +567,10 @@ st_CopyImageSubData(struct gl_context *ctx,
dst_res = dst->pt;
dst_level = dst_image->Level;
dst_z += dst_image->Face;
+ if (dst_image->TexObject->Immutable) {
+ dst_level += dst_image->TexObject->MinLevel;
+ dst_z += dst_image->TexObject->MinLayer;
+ }
} else {
struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
dst_res = dst->texture;
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index d4c916e8057..62f149aa0fb 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1873,6 +1873,34 @@ st_TextureView(struct gl_context *ctx,
return GL_TRUE;
}
+static void
+st_ClearTexSubImage(struct gl_context *ctx,
+ struct gl_texture_image *texImage,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ const GLvoid *clearValue)
+{
+ static const char zeros[16] = {0};
+ struct st_texture_image *stImage = st_texture_image(texImage);
+ struct pipe_resource *pt = stImage->pt;
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ unsigned level = texImage->Level;
+ struct pipe_box box;
+
+ if (!pt)
+ return;
+
+ u_box_3d(xoffset, yoffset, zoffset + texImage->Face,
+ width, height, depth, &box);
+ if (texImage->TexObject->Immutable) {
+ level += texImage->TexObject->MinLevel;
+ box.z += texImage->TexObject->MinLayer;
+ }
+
+ pipe->clear_texture(pipe, pt, level, &box, clearValue ? clearValue : zeros);
+}
+
void
st_init_texture_functions(struct dd_function_table *functions)
{
@@ -1904,4 +1932,5 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->AllocTextureStorage = st_AllocTextureStorage;
functions->TextureView = st_TextureView;
+ functions->ClearTexSubImage = st_ClearTexSubImage;
}
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index bd7cbccc20c..99e96e1f3ae 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -254,6 +254,7 @@ void st_init_limits(struct pipe_screen *screen,
PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT);
options->LowerClipDistance = true;
+ options->LowerBufferInterfaceBlocks = true;
}
c->LowerTessLevel = true;
@@ -438,6 +439,7 @@ void st_init_extensions(struct pipe_screen *screen,
static const struct st_extension_cap_mapping cap_mapping[] = {
{ o(ARB_base_instance), PIPE_CAP_START_INSTANCE },
{ o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT },
+ { o(ARB_clear_texture), PIPE_CAP_CLEAR_TEXTURE },
{ o(ARB_color_buffer_float), PIPE_CAP_VERTEX_COLOR_UNCLAMPED },
{ o(ARB_copy_image), PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS },
{ o(ARB_depth_clamp), PIPE_CAP_DEPTH_CLIP_DISABLE },
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f481e8902d8..3ad1afdecda 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4408,6 +4408,7 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
TGSI_SEMANTIC_SAMPLEID,
TGSI_SEMANTIC_SAMPLEPOS,
TGSI_SEMANTIC_SAMPLEMASK,
+ TGSI_SEMANTIC_HELPER_INVOCATION,
/* Tessellation shaders
*/
@@ -5138,6 +5139,8 @@ st_translate_program(
TGSI_SEMANTIC_BASEVERTEX);
assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
TGSI_SEMANTIC_TESSCOORD);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] ==
+ TGSI_SEMANTIC_HELPER_INVOCATION);
t = CALLOC_STRUCT(st_translate);
if (!t) {
@@ -5822,7 +5825,6 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
(!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
(options->EmitNoSat ? SAT_TO_CLAMP : 0));
- lower_ubo_reference(prog->_LinkedShaders[i], ir);
do_vec_index_to_cond_assign(ir);
lower_vector_insert(ir, true);
lower_quadop_vector(ir, false);
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 7abd128e719..d0d261f4fde 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -623,6 +623,58 @@ st_context_destroy(struct st_context_iface *stctxi)
st_destroy_context(st);
}
+static void
+st_debug_message(void *data,
+ unsigned *id,
+ enum pipe_debug_type ptype,
+ const char *fmt,
+ va_list args)
+{
+ struct st_context *st = data;
+ enum mesa_debug_source source;
+ enum mesa_debug_type type;
+ enum mesa_debug_severity severity;
+
+ switch (ptype) {
+ case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
+ source = MESA_DEBUG_SOURCE_API;
+ type = MESA_DEBUG_TYPE_ERROR;
+ severity = MESA_DEBUG_SEVERITY_MEDIUM;
+ break;
+ case PIPE_DEBUG_TYPE_ERROR:
+ source = MESA_DEBUG_SOURCE_API;
+ type = MESA_DEBUG_TYPE_ERROR;
+ severity = MESA_DEBUG_SEVERITY_MEDIUM;
+ break;
+ case PIPE_DEBUG_TYPE_SHADER_INFO:
+ source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
+ type = MESA_DEBUG_TYPE_OTHER;
+ severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+ break;
+ case PIPE_DEBUG_TYPE_PERF_INFO:
+ source = MESA_DEBUG_SOURCE_API;
+ type = MESA_DEBUG_TYPE_PERFORMANCE;
+ severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+ break;
+ case PIPE_DEBUG_TYPE_INFO:
+ source = MESA_DEBUG_SOURCE_API;
+ type = MESA_DEBUG_TYPE_OTHER;
+ severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+ break;
+ case PIPE_DEBUG_TYPE_FALLBACK:
+ source = MESA_DEBUG_SOURCE_API;
+ type = MESA_DEBUG_TYPE_PERFORMANCE;
+ severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+ break;
+ case PIPE_DEBUG_TYPE_CONFORMANCE:
+ source = MESA_DEBUG_SOURCE_API;
+ type = MESA_DEBUG_TYPE_OTHER;
+ severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+ break;
+ }
+ _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
+}
+
static struct st_context_iface *
st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
const struct st_context_attribs *attribs,
@@ -677,6 +729,11 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
return NULL;
}
st->ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
+
+ if (pipe->set_debug_callback) {
+ struct pipe_debug_callback cb = { st_debug_message, st };
+ pipe->set_debug_callback(pipe, &cb);
+ }
}
if (attribs->flags & ST_CONTEXT_FLAG_FORWARD_COMPATIBLE)
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index a614b26cae4..7534599c313 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -114,6 +114,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
if (_mesa_inside_begin_end(exec->ctx)) {
exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive;
exec->vtx.prim[0].begin = 0;
+ exec->vtx.prim[0].end = 0;
exec->vtx.prim[0].start = 0;
exec->vtx.prim[0].count = 0;
exec->vtx.prim_count++;
@@ -846,17 +847,23 @@ static void GLAPIENTRY vbo_exec_End( void )
/* We're finishing drawing a line loop. Append 0th vertex onto
* end of vertex buffer so we can draw it as a line strip.
*/
- const fi_type *src = exec->vtx.buffer_map;
+ const fi_type *src = exec->vtx.buffer_map +
+ last_prim->start * exec->vtx.vertex_size;
fi_type *dst = exec->vtx.buffer_map +
exec->vtx.vert_count * exec->vtx.vertex_size;
/* copy 0th vertex to end of buffer */
memcpy(dst, src, exec->vtx.vertex_size * sizeof(fi_type));
- assert(last_prim->start == 0);
last_prim->start++; /* skip vertex0 */
/* note that last_prim->count stays unchanged */
last_prim->mode = GL_LINE_STRIP;
+
+ /* Increment the vertex count so the next primitive doesn't
+ * overwrite the last vertex which we just added.
+ */
+ exec->vtx.vert_count++;
+ exec->vtx.buffer_ptr += exec->vtx.vertex_size;
}
try_vbo_merge(exec);
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index ed5d9e947b0..0d42618f246 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -117,6 +117,7 @@ vbo_copy_vertices( struct vbo_exec_context *exec )
* subtract one from last_prim->start) so that we copy the 0th vertex
* to the next vertex buffer.
*/
+ assert(last_prim->start > 0);
src -= sz;
}
/* fall-through */