diff options
author | Jason Ekstrand <[email protected]> | 2015-11-23 14:03:47 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-11-23 14:03:47 -0800 |
commit | 179fc4aae8f782453f0488e8dd508f9a01117376 (patch) | |
tree | 5f0cc77b30d86b581fb968a71ba83c5e4c2546d7 /src/mesa | |
parent | e14b2c76b40398a61f45f5d058079641661a66cb (diff) | |
parent | d9b8fde963a53d4e06570d8bece97f806714507a (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
This pulls in nir cloning and some much-needed upstream refactors.
Diffstat (limited to 'src/mesa')
83 files changed, 2450 insertions, 1813 deletions
diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index a9da0a21ba3..bde170fcf6f 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -102,13 +102,13 @@ fallback_required(struct gl_context *ctx, GLenum target, */ if (!mipmap->FBO) _mesa_GenFramebuffers(1, &mipmap->FBO); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, mipmap->FBO); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, mipmap->FBO); - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0); + _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0); - status = _mesa_CheckFramebufferStatus(GL_FRAMEBUFFER_EXT); + status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, fboSave); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fboSave); if (status != GL_FRAMEBUFFER_COMPLETE_EXT) { _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, @@ -131,6 +131,11 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gen_mipmap_state *mipmap) _mesa_DeleteSamplers(1, &mipmap->Sampler); mipmap->Sampler = 0; + if (mipmap->FBO != 0) { + _mesa_DeleteFramebuffers(1, &mipmap->FBO); + mipmap->FBO = 0; + } + _mesa_meta_blit_shader_table_cleanup(&mipmap->shaders); } diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index b8ab480ddfe..a8f7c9b854b 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -59,6 +59,9 @@ extern char *program_invocation_name, *program_invocation_short_name; #elif defined(__NetBSD__) && defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000100) # include <stdlib.h> # define GET_PROGRAM_NAME() getprogname() +#elif defined(__DragonFly__) +# include <stdlib.h> +# define GET_PROGRAM_NAME() getprogname() #elif defined(__APPLE__) # include <stdlib.h> # define GET_PROGRAM_NAME() getprogname() diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 7fa4ce87f18..b8990cef89e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -85,7 +85,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst, unsigned msg_length) { fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf), - fs_reg(0u), fs_reg(0u)); + brw_imm_ud(0u), brw_imm_ud(0u)); inst->base_mrf = base_mrf; inst->mlen = msg_length; diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 40ad14402a7..73ba85e2a61 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -224,7 +224,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, vec1(t_nopersp), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); - brw_MOV(p, t_nopersp, brw_imm_vf4(1, 0, 0, 0)); + brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0))); brw_ENDIF(p); /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */ diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index cd78af0dce4..e49994f19a8 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -23,6 +23,7 @@ #pragma once +#include <stdio.h> #include "brw_device_info.h" #include "main/mtypes.h" @@ -89,8 +90,7 @@ struct brw_compiler { void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); - bool scalar_vs; - bool scalar_gs; + bool scalar_stage[MESA_SHADER_STAGES]; struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; @@ -493,6 +493,34 @@ enum shader_dispatch_mode { DISPATCH_MODE_SIMD8 = 3, }; +/** + * @defgroup Tessellator parameter enumerations. + * + * These correspond to the hardware values in 3DSTATE_TE, and are provided + * as part of the tessellation evaluation shader. + * + * @{ + */ +enum brw_tess_partitioning { + BRW_TESS_PARTITIONING_INTEGER = 0, + BRW_TESS_PARTITIONING_ODD_FRACTIONAL = 1, + BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2, +}; + +enum brw_tess_output_topology { + BRW_TESS_OUTPUT_TOPOLOGY_POINT = 0, + BRW_TESS_OUTPUT_TOPOLOGY_LINE = 1, + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2, + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3, +}; + +enum brw_tess_domain { + BRW_TESS_DOMAIN_QUAD = 0, + BRW_TESS_DOMAIN_TRI = 1, + BRW_TESS_DOMAIN_ISOLINE = 2, +}; +/** @} */ + struct brw_vue_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ac6045dbba9..2ea0a9eca92 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -322,64 +322,82 @@ static void brw_initialize_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct brw_compiler *compiler = brw->intelScreen->compiler; + + const bool stage_exists[MESA_SHADER_STAGES] = { + [MESA_SHADER_VERTEX] = true, + [MESA_SHADER_TESS_CTRL] = false, + [MESA_SHADER_TESS_EVAL] = false, + [MESA_SHADER_GEOMETRY] = brw->gen >= 6, + [MESA_SHADER_FRAGMENT] = true, + [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader, + }; + + unsigned num_stages = 0; + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + if (stage_exists[i]) + num_stages++; + } unsigned max_samplers = brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; + ctx->Const.MaxDualSourceDrawBuffers = 1; + ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; + ctx->Const.MaxCombinedShaderOutputResources = + MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; + ctx->Const.QueryCounterBits.Timestamp = 36; + ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ + ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; + ctx->Const.MaxRenderbufferSize = 8192; + ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); + ctx->Const.Max3DTextureLevels = 12; /* 2048 */ + ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ + ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; + ctx->Const.MaxTextureMbytes = 1536; + ctx->Const.MaxTextureRectSize = 1 << 12; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.StripTextureBorder = true; + if (brw->gen >= 7) + ctx->Const.MaxProgramTextureGatherComponents = 4; + else if (brw->gen == 6) + ctx->Const.MaxProgramTextureGatherComponents = 1; ctx->Const.MaxUniformBlockSize = 65536; + for (int i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_program_constants *prog = &ctx->Const.Program[i]; + + if (!stage_exists[i]) + continue; + + prog->MaxTextureImageUnits = max_samplers; + prog->MaxUniformBlocks = BRW_MAX_UBO; prog->MaxCombinedUniformComponents = prog->MaxUniformComponents + ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; + + prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; + prog->MaxAtomicBuffers = BRW_MAX_ABO; + prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; + prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; } - ctx->Const.MaxDualSourceDrawBuffers = 1; - ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers; - ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers; - if (brw->gen >= 6) - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers; - else - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0; - if (_mesa_extension_override_enables.ARB_compute_shader) { - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO; - } else { - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0; - } - ctx->Const.MaxCombinedTextureImageUnits = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - - ctx->Const.MaxTextureLevels = 14; /* 8192 */ - if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS) - ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS; - ctx->Const.Max3DTextureLevels = 12; /* 2048 */ - ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ - ctx->Const.MaxTextureMbytes = 1536; - - if (brw->gen >= 7) - ctx->Const.MaxArrayTextureLayers = 2048; - else - ctx->Const.MaxArrayTextureLayers = 512; - ctx->Const.MaxTextureRectSize = 1 << 12; + ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; + ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; + ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; + ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; + ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; + ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; + ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - - ctx->Const.MaxRenderbufferSize = 8192; /* Hardware only supports a limited number of transform feedback buffers. * So we need to override the Mesa default (which is based only on software @@ -427,6 +445,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxColorTextureSamples = max_samples; ctx->Const.MaxDepthTextureSamples = max_samples; ctx->Const.MaxIntegerSamples = max_samples; + ctx->Const.MaxImageSamples = 0; /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used * to map indices of rectangular grid to sample numbers within a pixel. @@ -436,11 +455,6 @@ brw_initialize_context_constants(struct brw_context *brw) */ gen6_set_sample_maps(ctx); - if (brw->gen >= 7) - ctx->Const.MaxProgramTextureGatherComponents = 4; - else if (brw->gen == 6) - ctx->Const.MaxProgramTextureGatherComponents = 1; - ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; if (brw->gen >= 6) { @@ -511,30 +525,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; - if (brw->gen >= 7) { - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms = - BRW_MAX_IMAGES; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms = - (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0); - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = - BRW_MAX_IMAGES; - ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; - ctx->Const.MaxCombinedShaderOutputResources = - MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; - ctx->Const.MaxImageSamples = 0; - ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES; - } - /* Gen6 converts quads to polygon in beginning of 3D pipeline, * but we're not sure how it's actually done for vertex order, * that affect provoking vertex decision. Always use last vertex @@ -586,21 +576,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.TextureBufferOffsetAlignment = 16; ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; - /* FIXME: Tessellation stages are not yet supported in i965, so - * MaxCombinedShaderStorageBlocks doesn't take them into account. - */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0; - ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3; - ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3; - - if (_mesa_extension_override_enables.ARB_compute_shader) - ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO; - if (brw->gen >= 6) { ctx->Const.MaxVarying = 32; ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4b2db61c758..fe45edb89ff 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -523,6 +523,8 @@ struct brw_tracked_state { enum shader_time_shader_type { ST_NONE, ST_VS, + ST_TCS, + ST_TES, ST_GS, ST_FS8, ST_FS16, @@ -1465,6 +1467,8 @@ void brw_upload_image_surfaces(struct brw_context *brw, /* brw_surface_formats.c */ bool brw_render_target_supported(struct brw_context *brw, struct gl_renderbuffer *rb); +bool brw_losslessly_compressible_format(struct brw_context *brw, + uint32_t brw_format); uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo, mesa_format format); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3ad90da8b2f..36d9f716e03 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1291,6 +1291,16 @@ enum opcode { * Calculate the high 32-bits of a 32x32 multiply. */ SHADER_OPCODE_MULH, + + /** + * A MOV that uses VxH indirect addressing. + * + * Source 0: A register to start from (HW_REG). + * Source 1: An indirect offset (in bytes, UD GRF). + * Source 2: The length of the region that could be accessed (in bytes, + * UD immediate). + */ + SHADER_OPCODE_MOV_INDIRECT, }; enum brw_urb_write_flags { @@ -1930,8 +1940,14 @@ enum brw_message_target { /* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size * is 2^9, or 512. It's counted in multiples of 64 bytes. + * + * Identical for VS, DS, and HS. */ #define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_VS_URB_ENTRY_SIZE_BYTES (512*64) + /* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit * (128 bytes) URB rows and the maximum allowed value is 5 rows. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 250d4097e38..419168966de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -43,6 +43,7 @@ #include "brw_wm.h" #include "brw_fs.h" #include "brw_cs.h" +#include "brw_nir.h" #include "brw_vec4_gs_visitor.h" #include "brw_cfg.h" #include "brw_dead_control_flow.h" @@ -186,7 +187,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3)); + bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3)); int scale = 1; if (devinfo->gen == 4 && bld.dispatch_width() == 8) { @@ -374,54 +375,6 @@ fs_reg::fs_reg() this->file = BAD_FILE; } -/** Immediate value constructor. */ -fs_reg::fs_reg(float f) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_F; - this->stride = 0; - this->f = f; -} - -/** Immediate value constructor. */ -fs_reg::fs_reg(int32_t i) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_D; - this->stride = 0; - this->d = i; -} - -/** Immediate value constructor. */ -fs_reg::fs_reg(uint32_t u) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_UD; - this->stride = 0; - this->ud = u; -} - -/** Vector float immediate value constructor. */ -fs_reg::fs_reg(uint8_t vf[4]) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - memcpy(&this->ud, vf, sizeof(unsigned)); -} - -/** Vector float immediate value constructor. */ -fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24); -} - fs_reg::fs_reg(struct brw_reg reg) : backend_reg(reg) { @@ -591,7 +544,7 @@ fs_visitor::emit_shader_time_end() fs_reg reset = shader_end_time; reset.set_smear(2); set_condmod(BRW_CONDITIONAL_Z, - ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u))); + ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u))); ibld.IF(BRW_PREDICATE_NORMAL); fs_reg start = shader_start_time; @@ -606,11 +559,11 @@ fs_visitor::emit_shader_time_end() * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - cbld.ADD(diff, diff, fs_reg(-2u)); + cbld.ADD(diff, diff, brw_imm_ud(-2u)); SHADER_TIME_ADD(cbld, 0, diff); - SHADER_TIME_ADD(cbld, 1, fs_reg(1u)); + SHADER_TIME_ADD(cbld, 1, brw_imm_ud(1u)); ibld.emit(BRW_OPCODE_ELSE); - SHADER_TIME_ADD(cbld, 2, fs_reg(1u)); + SHADER_TIME_ADD(cbld, 2, brw_imm_ud(1u)); ibld.emit(BRW_OPCODE_ENDIF); } @@ -620,7 +573,7 @@ fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, fs_reg value) { int index = shader_time_index * 3 + shader_time_subindex; - fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE); + struct brw_reg offset = brw_imm_d(index * SHADER_TIME_STRIDE); fs_reg payload; if (dispatch_width == 8) @@ -841,6 +794,34 @@ fs_inst::regs_read(int arg) const case SHADER_OPCODE_BARRIER: return 1; + case SHADER_OPCODE_MOV_INDIRECT: + if (arg == 0) { + assert(src[2].file == IMM); + unsigned region_length = src[2].ud; + + if (src[0].file == FIXED_GRF) { + /* If the start of the region is not register aligned, then + * there's some portion of the register that's technically + * unread at the beginning. + * + * However, the register allocator works in terms of whole + * registers, and does not use subnr. It assumes that the + * read starts at the beginning of the register, and extends + * regs_read() whole registers beyond that. + * + * To compensate, we extend the region length to include this + * unread portion at the beginning. + */ + if (src[0].subnr) + region_length += src[0].subnr * type_sz(src[0].type); + + return DIV_ROUND_UP(region_length, REG_SIZE); + } else { + assert(!"Invalid register file"); + } + } + break; + default: if (is_tex() && arg == 0 && src[0].file == VGRF) return mlen; @@ -1005,7 +986,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, if (pixel_center_integer) { bld.MOV(wpos, this->pixel_x); } else { - bld.ADD(wpos, this->pixel_x, fs_reg(0.5f)); + bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f)); } wpos = offset(wpos, bld, 1); @@ -1021,7 +1002,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, offset += key->drawable_height - 1.0f; } - bld.ADD(wpos, pixel_y, fs_reg(offset)); + bld.ADD(wpos, pixel_y, brw_imm_f(offset)); } wpos = offset(wpos, bld, 1); @@ -1198,7 +1179,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); g0.negate = true; - bld.ASR(*reg, g0, fs_reg(15)); + bld.ASR(*reg, g0, brw_imm_d(15)); } else { /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create * a boolean result from this (1/true or 0/false). @@ -1213,7 +1194,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); g1_6.negate = true; - bld.ASR(*reg, g1_6, fs_reg(31)); + bld.ASR(*reg, g1_6, brw_imm_d(31)); } return reg; @@ -1230,7 +1211,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) /* Convert int_sample_pos to floating point */ bld.MOV(dst, int_sample_pos); /* Scale to the range [0, 1] */ - bld.MUL(dst, dst, fs_reg(1 / 16.0f)); + bld.MUL(dst, dst, brw_imm_f(1 / 16.0f)); } else { /* From ARB_sample_shading specification: @@ -1238,7 +1219,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) * rasterization is disabled, gl_SamplePosition will always be * (0.5, 0.5). */ - bld.MOV(dst, fs_reg(0.5f)); + bld.MOV(dst, brw_imm_f(0.5f)); } } @@ -1333,8 +1314,8 @@ fs_visitor::emit_sampleid_setup() abld.exec_all().group(1, 0) .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), - fs_reg(sspi_mask)); - abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5)); + brw_imm_ud(sspi_mask)); + abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5)); /* This works for both SIMD8 and SIMD16 */ abld.exec_all().group(4, 0) @@ -1349,7 +1330,7 @@ fs_visitor::emit_sampleid_setup() * "When rendering to a non-multisample buffer, or if multisample * rasterization is disabled, gl_SampleID will always be zero." */ - abld.MOV(*reg, fs_reg(0)); + abld.MOV(*reg, brw_imm_d(0)); } return reg; @@ -1662,24 +1643,7 @@ fs_visitor::assign_gs_urb_setup() first_non_payload_grf += 8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in; - const unsigned first_icp_handle = payload.num_regs - - (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0); - foreach_block_and_inst(block, fs_inst, inst, cfg) { - /* Lower URB_READ_SIMD8 opcodes into real messages. */ - if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) { - assert(inst->src[0].file == IMM); - inst->src[0] = retype(brw_vec8_grf(first_icp_handle + - inst->src[0].ud, - 0), BRW_REGISTER_TYPE_UD); - /* for now, assume constant - we can do per-slot offsets later */ - assert(inst->src[1].file == IMM); - inst->offset = inst->src[1].ud; - inst->src[1] = fs_reg(); - inst->mlen = 1; - inst->base_mrf = -1; - } - /* Rewrite all ATTR file references to GRFs. */ convert_attr_sources_to_hw_regs(inst); } @@ -2037,16 +2001,16 @@ fs_visitor::demote_pull_constants() /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { VARYING_PULL_CONSTANT_LOAD(ibld, dst, - fs_reg(index), + brw_imm_ud(index), *inst->src[i].reladdr, pull_index); inst->src[i].reladdr = NULL; inst->src[i].stride = 1; } else { const fs_builder ubld = ibld.exec_all().group(8, 0); - fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, fs_reg(index), offset); + dst, brw_imm_ud(index), offset); inst->src[i].set_smear(pull_index & 3); } brw_mark_surface_used(prog_data, index); @@ -2738,7 +2702,7 @@ fs_visitor::eliminate_find_live_channel() case SHADER_OPCODE_FIND_LIVE_CHANNEL: if (depth == 0) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = fs_reg(0u); + inst->src[0] = brw_imm_ud(0u); inst->sources = 1; inst->force_writemask_all = true; progress = true; @@ -3591,6 +3555,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, assert(devinfo->gen >= 9); assert(bld.dispatch_width() != 16); + /* XXX: src_stencil is only available on gen9+. dst_depth is never + * available on gen9+. As such it's impossible to have both enabled at the + * same time and therefore length cannot overrun the array. + */ + assert(length < 15); + sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.exec_all().annotate("FB write OS") .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], @@ -3660,7 +3630,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, (has_lod || shadow_c.file != BAD_FILE || (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) { for (unsigned i = coord_components; i < 3; i++) - bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f)); + bld.MOV(offset(msg_end, bld, i), brw_imm_f(0.0f)); msg_end = offset(msg_end, bld, 3 - coord_components); } @@ -3717,7 +3687,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ - bld.MOV(msg_end, fs_reg(0.0f)); + bld.MOV(msg_end, brw_imm_f(0.0f)); msg_end = offset(msg_end, bld, 1); } @@ -3813,7 +3783,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, case SHADER_OPCODE_TXF_CMS: msg_lod = offset(msg_coords, bld, 3); /* lod */ - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); /* sample index */ bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index); msg_end = offset(msg_lod, bld, 2); @@ -3896,7 +3866,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, if (bld.shader->stage != MESA_SHADER_FRAGMENT && op == SHADER_OPCODE_TEX) { op = SHADER_OPCODE_TXL; - lod = fs_reg(0.0f); + lod = brw_imm_f(0.0f); } /* Set up the LOD info */ @@ -4110,7 +4080,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask) { fs_builder ubld = bld.exec_all().group(8, 0); const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD); - ubld.MOV(dst, fs_reg(0)); + ubld.MOV(dst, brw_imm_d(0)); ubld.MOV(component(dst, 7), sample_mask); return dst; } @@ -4252,7 +4222,7 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: lower_surface_logical_send(ibld, inst, SHADER_OPCODE_TYPED_SURFACE_READ, - fs_reg(0xffff)); + brw_imm_d(0xffff)); break; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: @@ -4677,6 +4647,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case IMM: unreachable("not reached"); } + if (inst->dst.stride != 1) + fprintf(file, "<%u>", inst->dst.stride); fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type)); for (int i = 0; i < inst->sources; i++) { @@ -4764,6 +4736,16 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "|"); if (inst->src[i].file != IMM) { + unsigned stride; + if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { + unsigned hstride = inst->src[i].hstride; + stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); + } else { + stride = inst->src[i].stride; + } + if (stride != 1) + fprintf(file, "<%u>", stride); + fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type)); } @@ -5241,7 +5223,7 @@ fs_visitor::run_gs() */ if (gs_compile->control_data_header_size_bits <= 32) { const fs_builder abld = bld.annotate("initialize control data bits"); - abld.MOV(this->control_data_bits, fs_reg(0u)); + abld.MOV(this->control_data_bits, brw_imm_ud(0u)); } } @@ -5474,13 +5456,18 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_wm_prog_key *key, struct brw_wm_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, struct gl_program *prog, int shader_time_index8, int shader_time_index16, bool use_rep_send, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + true); + shader = brw_postprocess_nir(shader, compiler->devinfo, true); + /* key->alpha_test_func means simulating alpha testing via discards, * so the shader definitely kills pixels. */ @@ -5633,11 +5620,16 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_cs_prog_key *key, struct brw_cs_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, int shader_time_index, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + true); + shader = brw_postprocess_nir(shader, compiler->devinfo, true); + prog_data->local_size[0] = shader->info.cs.local_size[0]; prog_data->local_size[1] = shader->info.cs.local_size[1]; prog_data->local_size[2] = shader->info.cs.local_size[2]; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9b56afd292f..658608f9951 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -116,10 +116,6 @@ public: void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); void compute_clip_distance(gl_clip_plane *clip_planes); - uint32_t gather_channel(int orig_chan, uint32_t surface, uint32_t sampler); - void swizzle_result(ir_texture_opcode op, int dest_components, - fs_reg orig_val, uint32_t sampler); - fs_inst *get_instruction_generating_reg(fs_inst *start, fs_inst *end, const fs_reg ®); @@ -218,8 +214,6 @@ public: void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); - fs_reg rescale_texcoord(fs_reg coordinate, int coord_components, - bool is_rect, uint32_t sampler); void emit_texture(ir_texture_opcode op, const glsl_type *dest_type, fs_reg coordinate, int components, @@ -230,7 +224,6 @@ public: fs_reg mcs, int gather_component, bool is_cube_array, - bool is_rect, uint32_t surface, fs_reg surface_reg, uint32_t sampler, @@ -305,7 +298,8 @@ public: unsigned stream_id); void emit_gs_thread_end(); void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - unsigned offset, unsigned num_components); + const fs_reg &indirect_offset, unsigned imm_offset, + unsigned num_components); void emit_cs_terminate(); fs_reg *emit_cs_local_invocation_id_setup(); fs_reg *emit_cs_work_group_id_setup(); @@ -530,6 +524,11 @@ private: struct brw_reg offset, struct brw_reg value); + void generate_mov_indirect(fs_inst *inst, + struct brw_reg dst, + struct brw_reg reg, + struct brw_reg indirect_byte_offset); + bool patch_discard_jumps_to_fb_writes(); const struct brw_compiler *compiler; diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index 22b2f22073f..dd3c383a17d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -225,7 +225,7 @@ namespace brw { sample_mask_reg() const { if (shader->stage != MESA_SHADER_FRAGMENT) { - return src_reg(0xffff); + return brw_imm_d(0xffff); } else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) { return brw_flag_reg(0, 1); } else { @@ -548,7 +548,7 @@ namespace brw { const dst_reg x_times_one_minus_a = vgrf(dst.type); MUL(y_times_a, y, a); - ADD(one_minus_a, negate(a), src_reg(1.0f)); + ADD(one_minus_a, negate(a), brw_imm_f(1.0f)); MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp index 8fdc959f992..7c01f1e3d62 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp @@ -90,7 +90,8 @@ opt_cmod_propagation_local(bblock_t *block) foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->overwrites_reg(inst->src[0])) { if (scan_inst->is_partial_write() || - scan_inst->dst.reg_offset != inst->src[0].reg_offset) + scan_inst->dst.reg_offset != inst->src[0].reg_offset || + scan_inst->exec_size != inst->exec_size) break; /* CMP's result is the same regardless of dest type. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index 0c115f50748..c3ad7ad4771 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -279,7 +279,7 @@ fs_visitor::opt_combine_constants() imm->block->last_non_control_flow_inst()->next); const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0); - ibld.MOV(reg, fs_reg(imm->val)); + ibld.MOV(reg, brw_imm_f(imm->val)); imm->nr = reg.nr; imm->subreg_offset = reg.subreg_offset; diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 426ea57d8f9..62ae9abede7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -275,6 +275,59 @@ is_logic_op(enum opcode opcode) opcode == BRW_OPCODE_NOT); } +static bool +can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, + const brw_device_info *devinfo) +{ + if (stride > 4) + return false; + + /* 3-source instructions can only be Align16, which restricts what strides + * they can take. They can only take a stride of 1 (the usual case), or 0 + * with a special "repctrl" bit. But the repctrl bit doesn't work for + * 64-bit datatypes, so if the source type is 64-bit then only a stride of + * 1 is allowed. From the Broadwell PRM, Volume 7 "3D Media GPGPU", page + * 944: + * + * This is applicable to 32b datatypes and 16b datatype. 64b datatypes + * cannot use the replicate control. + */ + if (inst->is_3src()) { + if (type_sz(inst->src[arg].type) > 4) + return stride == 1; + else + return stride == 1 || stride == 0; + } + + /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions", + * page 391 ("Extended Math Function"): + * + * The following restrictions apply for align1 mode: Scalar source is + * supported. Source and destination horizontal stride must be the + * same. + * + * From the Haswell PRM Volume 2b "Command Reference - Instructions", page + * 134 ("Extended Math Function"): + * + * Scalar source is supported. Source and destination horizontal stride + * must be 1. + * + * and similar language exists for IVB and SNB. Pre-SNB, math instructions + * are sends, so the sources are moved to MRF's and there are no + * restrictions. + */ + if (inst->is_math()) { + if (devinfo->gen == 6 || devinfo->gen == 7) { + assert(inst->dst.stride == 1); + return stride == 1 || stride == 0; + } else if (devinfo->gen >= 8) { + return stride == inst->dst.stride || stride == 0; + } + } + + return true; +} + bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { @@ -326,7 +379,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) /* Bail if the result of composing both strides would exceed the * hardware limit. */ - if (entry->src.stride * inst->src[arg].stride > 4) + if (!can_take_stride(inst, arg, entry->src.stride * inst->src[arg].stride, + devinfo)) return false; /* Bail if the instruction type is larger than the execution type of the diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 8c67caff6e0..3b65a382dc8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: return true; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: @@ -209,6 +210,8 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { copy = bld.MOV(inst->dst, src); + copy->force_sechalf = inst->force_sechalf; + copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } assert(copy->regs_written == written); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 9d7fb94c397..8528f391941 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -89,39 +89,9 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) brw_reg.abs = reg->abs; brw_reg.negate = reg->negate; break; - case IMM: - assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V || - reg->type == BRW_REGISTER_TYPE_UV || - reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0)); - - switch (reg->type) { - case BRW_REGISTER_TYPE_F: - brw_reg = brw_imm_f(reg->f); - break; - case BRW_REGISTER_TYPE_D: - brw_reg = brw_imm_d(reg->d); - break; - case BRW_REGISTER_TYPE_UD: - brw_reg = brw_imm_ud(reg->ud); - break; - case BRW_REGISTER_TYPE_W: - brw_reg = brw_imm_w(reg->d); - break; - case BRW_REGISTER_TYPE_UW: - brw_reg = brw_imm_uw(reg->ud); - break; - case BRW_REGISTER_TYPE_VF: - brw_reg = brw_imm_vf(reg->ud); - break; - case BRW_REGISTER_TYPE_V: - brw_reg = brw_imm_v(reg->ud); - break; - default: - unreachable("not reached"); - } - break; case ARF: case FIXED_GRF: + case IMM: brw_reg = *static_cast<struct brw_reg *>(reg); break; case BAD_FILE: @@ -372,6 +342,36 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) } void +fs_generator::generate_mov_indirect(fs_inst *inst, + struct brw_reg dst, + struct brw_reg reg, + struct brw_reg indirect_byte_offset) +{ + assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD); + assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE); + + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; + + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ + struct brw_reg addr = vec8(brw_address_reg(0)); + + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re re-type to UW and use a stride. + */ + indirect_byte_offset = + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + + /* Prior to Broadwell, there are only 8 address registers. */ + assert(inst->exec_size == 8 || devinfo->gen >= 8); + + brw_MOV(p, addr, indirect_byte_offset); + brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE); + brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); +} + +void fs_generator::generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg header) @@ -700,6 +700,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; } + /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type + * is set as part of the message descriptor. On gen4, the PRM seems to + * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on + * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is + * gone from the message descriptor entirely and you just get UINT32 all + * the time regasrdless. Since we can really only do non-UINT32 on gen4, + * just stomp it to UINT32 all the time. + */ + if (inst->opcode == SHADER_OPCODE_TXS) + return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; + switch (inst->exec_size) { case 8: simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; @@ -2087,6 +2098,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count++; break; + case SHADER_OPCODE_MOV_INDIRECT: + generate_mov_indirect(inst, dst, src[0], src[1]); + break; + case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: generate_urb_read(inst, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 3a666b8debc..6b0c4a5b36e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -208,7 +208,7 @@ emit_system_values_block(nir_block *block, void *void_visitor) const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL); fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.SHR(iid, g1, fs_reg(27u)); + abld.SHR(iid, g1, brw_imm_ud(27u)); *reg = iid; } break; @@ -250,6 +250,57 @@ emit_system_values_block(nir_block *block, void *void_visitor) *reg = *v->emit_cs_work_group_id_setup(); break; + case nir_intrinsic_load_helper_invocation: + assert(v->stage == MESA_SHADER_FRAGMENT); + reg = &v->nir_system_values[SYSTEM_VALUE_HELPER_INVOCATION]; + if (reg->file == BAD_FILE) { + const fs_builder abld = + v->bld.annotate("gl_HelperInvocation", NULL); + + /* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the + * pixel mask is in g1.7 of the thread payload. + * + * We move the per-channel pixel enable bit to the low bit of each + * channel by shifting the byte containing the pixel mask by the + * vector immediate 0x76543210UV. + * + * The region of <1,8,0> reads only 1 byte (the pixel masks for + * subspans 0 and 1) in SIMD8 and an additional byte (the pixel + * masks for 2 and 3) in SIMD16. + */ + fs_reg shifted = abld.vgrf(BRW_REGISTER_TYPE_UW, 1); + abld.SHR(shifted, + stride(byte_offset(retype(brw_vec1_grf(1, 0), + BRW_REGISTER_TYPE_UB), 28), + 1, 8, 0), + brw_imm_uv(0x76543210)); + + /* A set bit in the pixel mask means the channel is enabled, but + * that is the opposite of gl_HelperInvocation so we need to invert + * the mask. + * + * The negate source-modifier bit of logical instructions on Gen8+ + * performs 1's complement negation, so we can use that instead of + * a NOT instruction. + */ + fs_reg inverted = negate(shifted); + if (v->devinfo->gen < 8) { + inverted = abld.vgrf(BRW_REGISTER_TYPE_UW); + abld.NOT(inverted, shifted); + } + + /* We then resolve the 0/1 result to 0/~0 boolean values by ANDing + * with 1 and negating. + */ + fs_reg anded = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); + abld.AND(anded, inverted, brw_imm_uw(1)); + + fs_reg dst = abld.vgrf(BRW_REGISTER_TYPE_D, 1); + abld.MOV(dst, negate(retype(anded, BRW_REGISTER_TYPE_D))); + *reg = dst; + } + break; + default: break; } @@ -454,8 +505,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, tmp.subreg_offset = 2; tmp.stride = 2; - fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80)); - or_inst->src[1].type = BRW_REGISTER_TYPE_UW; + bld.OR(tmp, g0, brw_imm_uw(0x3f80)); tmp.type = BRW_REGISTER_TYPE_D; tmp.subreg_offset = 0; @@ -479,9 +529,9 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, g1_6.negate = true; } - bld.OR(tmp, g1_6, fs_reg(0x3f800000)); + bld.OR(tmp, g1_6, brw_imm_d(0x3f800000)); } - bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)); + bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf800000)); return true; } @@ -594,14 +644,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not * zero. */ - bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ); fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD); op[0].type = BRW_REGISTER_TYPE_UD; result.type = BRW_REGISTER_TYPE_UD; - bld.AND(result_int, op[0], fs_reg(0x80000000u)); + bld.AND(result_int, op[0], brw_imm_ud(0x80000000u)); - inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u)); + inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u)); inst->predicate = BRW_PREDICATE_NORMAL; if (instr->dest.saturate) { inst = bld.MOV(result, result); @@ -615,9 +665,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * -> non-negative val generates 0x00000000. * Predicated OR sets 1 if val is positive. */ - bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G); - bld.ASR(result, op[0], fs_reg(31)); - inst = bld.OR(result, result, fs_reg(1)); + bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G); + bld.ASR(result, op[0], brw_imm_d(31)); + inst = bld.OR(result, result, brw_imm_d(1)); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -665,21 +715,21 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_fddy: if (fs_key->high_quality_derivatives) { inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); } else { inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); } inst->saturate = instr->dest.saturate; break; case nir_op_fddy_fine: inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; case nir_op_fddy_coarse: inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; @@ -828,10 +878,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_f2b: - bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); + bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ); break; case nir_op_i2b: - bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ); + bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); break; case nir_op_ftrunc: @@ -931,9 +981,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ - bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); - inst = bld.ADD(result, result, fs_reg(31)); + inst = bld.ADD(result, result, brw_imm_d(31)); inst->predicate = BRW_PREDICATE_NORMAL; inst->src[0].negate = true; break; @@ -986,7 +1036,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) if (optimize_frontfacing_ternary(instr, result)) return; - bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); inst = bld.SEL(result, op[1], op[2]); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -1001,7 +1051,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) if (devinfo->gen <= 5 && (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { fs_reg masked = vgrf(glsl_type::int_type); - bld.AND(masked, result, fs_reg(1)); + bld.AND(masked, result, brw_imm_d(1)); masked.negate = true; bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked); } @@ -1014,7 +1064,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld, fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components); for (unsigned i = 0; i < instr->def.num_components; i++) - bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i])); + bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i])); nir_ssa_values[instr->def.index] = reg; } @@ -1042,7 +1092,7 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type)); v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect), - fs_reg(multiplier)); + brw_imm_d(multiplier)); } return reg; @@ -1108,12 +1158,12 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) */ bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect), BRW_REGISTER_TYPE_UD), - fs_reg(size - base - 1), BRW_CONDITIONAL_L); + brw_imm_ud(size - base - 1), BRW_CONDITIONAL_L); } else { bld.MOV(tmp, get_nir_src(deref_array->indirect)); } - bld.MUL(tmp, tmp, fs_reg(element_size)); + bld.MUL(tmp, tmp, brw_imm_ud(element_size)); if (image.reladdr) bld.ADD(*image.reladdr, *image.reladdr, tmp); else @@ -1232,7 +1282,7 @@ intexp2(const fs_builder &bld, const fs_reg &x) fs_reg result = bld.vgrf(x.type, 1); fs_reg one = bld.vgrf(x.type, 1); - bld.MOV(one, retype(fs_reg(1), one.type)); + bld.MOV(one, retype(brw_imm_d(1), one.type)); bld.SHL(result, one, x); return result; } @@ -1285,7 +1335,7 @@ fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src) /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu)); + abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); fs_reg mask = intexp2(abld, prev_count); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -1356,26 +1406,26 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) { fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu)); + abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); unsigned log2_bits_per_vertex = _mesa_fls(gs_compile->control_data_bits_per_vertex); - abld.SHR(dword_index, prev_count, fs_reg(6u - log2_bits_per_vertex)); + abld.SHR(dword_index, prev_count, brw_imm_ud(6u - log2_bits_per_vertex)); if (per_slot_offset.file != BAD_FILE) { /* Set the per-slot offset to dword_index / 4, so that we'll write to * the appropriate OWord within the control data header. */ - abld.SHR(per_slot_offset, dword_index, fs_reg(2u)); + abld.SHR(per_slot_offset, dword_index, brw_imm_ud(2u)); } /* Set the channel masks to 1 << (dword_index % 4), so that we'll * write to the appropriate DWORD within the OWORD. */ fs_reg channel = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - fwa_bld.AND(channel, dword_index, fs_reg(3u)); + fwa_bld.AND(channel, dword_index, brw_imm_ud(3u)); channel_mask = intexp2(fwa_bld, channel); /* Then the channel masks need to be in bits 23:16. */ - fwa_bld.SHL(channel_mask, channel_mask, fs_reg(16u)); + fwa_bld.SHL(channel_mask, channel_mask, brw_imm_ud(16u)); } /* Store the control data bits in the message payload and send it. */ @@ -1435,11 +1485,11 @@ fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count, /* reg::sid = stream_id */ fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.MOV(sid, fs_reg(stream_id)); + abld.MOV(sid, brw_imm_ud(stream_id)); /* reg:shift_count = 2 * (vertex_count - 1) */ fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.SHL(shift_count, vertex_count, fs_reg(1u)); + abld.SHL(shift_count, vertex_count, brw_imm_ud(1u)); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -1510,14 +1560,14 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, */ fs_inst *inst = abld.AND(bld.null_reg_d(), vertex_count, - fs_reg(32u / gs_compile->control_data_bits_per_vertex - 1u)); + brw_imm_ud(32u / gs_compile->control_data_bits_per_vertex - 1u)); inst->conditional_mod = BRW_CONDITIONAL_Z; abld.IF(BRW_PREDICATE_NORMAL); /* If vertex_count is 0, then no control data bits have been * accumulated yet, so we can skip emitting them. */ - abld.CMP(bld.null_reg_d(), vertex_count, fs_reg(0u), + abld.CMP(bld.null_reg_d(), vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ); abld.IF(BRW_PREDICATE_NORMAL); emit_gs_control_data_bits(vertex_count); @@ -1530,7 +1580,7 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, * effect of any call to EndPrimitive() that the shader may have * made before outputting its first vertex. */ - inst = abld.MOV(this->control_data_bits, fs_reg(0u)); + inst = abld.MOV(this->control_data_bits, brw_imm_ud(0u)); inst->force_writemask_all = true; abld.emit(BRW_OPCODE_ENDIF); } @@ -1551,42 +1601,113 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, void fs_visitor::emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - unsigned input_offset, + const fs_reg &indirect_offset, + unsigned imm_offset, unsigned num_components) { - const brw_vue_prog_data *vue_prog_data = (const brw_vue_prog_data *) prog_data; - const unsigned vertex = nir_src_as_const_value(vertex_src)->u[0]; + struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data; - const unsigned array_stride = vue_prog_data->urb_read_length * 8; + /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y], + * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only + * gl_PointSize is available as a GS input, however, so it must be that. + */ + const bool is_point_size = + indirect_offset.file == BAD_FILE && imm_offset == 0; + + nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); + const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; + + if (indirect_offset.file == BAD_FILE && vertex_const != NULL && + 4 * imm_offset < push_reg_count) { + imm_offset = 4 * imm_offset + vertex_const->u[0] * push_reg_count; + /* This input was pushed into registers. */ + if (is_point_size) { + /* gl_PointSize comes in .w */ + bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type)); + } else { + for (unsigned i = 0; i < num_components; i++) { + bld.MOV(offset(dst, bld, i), + fs_reg(ATTR, imm_offset + i, dst.type)); + } + } + } else { + /* Resort to the pull model. Ensure the VUE handles are provided. */ + gs_prog_data->base.include_vue_handles = true; - const bool pushed = 4 * input_offset < array_stride; + unsigned first_icp_handle = gs_prog_data->include_primitive_id ? 3 : 2; + fs_reg icp_handle; - if (input_offset == 0) { - /* This is the VUE header, containing VARYING_SLOT_LAYER [.y], - * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. - * Only gl_PointSize is available as a GS input, so they must - * be asking for that input. - */ - if (pushed) { - bld.MOV(dst, fs_reg(ATTR, array_stride * vertex + 3, dst.type)); + if (vertex_const) { + /* The vertex index is constant; just select the proper URB handle. */ + icp_handle = + retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0), + BRW_REGISTER_TYPE_UD); } else { - fs_reg tmp = bld.vgrf(dst.type, 4); - fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, - fs_reg(vertex), fs_reg(0)); - inst->regs_written = 4; - bld.MOV(dst, offset(tmp, bld, 3)); + /* The vertex index is non-constant. We need to use indirect + * addressing to fetch the proper URB handle. + * + * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0> + * indicating that channel <n> should read the handle from + * DWord <n>. We convert that to bytes by multiplying by 4. + * + * Next, we convert the vertex index to bytes by multiplying + * by 32 (shifting by 5), and add the two together. This is + * the final indirect byte offset. + */ + fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_W, 1); + fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + + /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */ + bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210))); + /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */ + bld.SHL(channel_offsets, sequence, brw_imm_ud(2u)); + /* Convert vertex_index to bytes (multiply by 32) */ + bld.SHL(vertex_offset_bytes, + retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD), + brw_imm_ud(5u)); + bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets); + + /* Use first_icp_handle as the base offset. There is one register + * of URB handles per vertex, so inform the register allocator that + * we might read up to nir->info.gs.vertices_in registers. + */ + bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, + fs_reg(brw_vec8_grf(first_icp_handle, 0)), + fs_reg(icp_offset_bytes), + brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE)); } - } else { - if (pushed) { - int index = vertex * array_stride + 4 * input_offset; - for (unsigned i = 0; i < num_components; i++) { - bld.MOV(offset(dst, bld, i), fs_reg(ATTR, index + i, dst.type)); - } + + fs_inst *inst; + if (indirect_offset.file == BAD_FILE) { + /* Constant indexing - use global offset. */ + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->mlen = 1; + inst->regs_written = num_components; } else { - fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, - fs_reg(vertex), fs_reg(input_offset)); + /* Indirect indexing - use per-slot offsets as well. */ + const fs_reg srcs[] = { icp_handle, indirect_offset }; + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->mlen = 2; inst->regs_written = num_components; } + + if (is_point_size) { + /* Read the whole VUE header (because of alignment) and read .w. */ + fs_reg tmp = bld.vgrf(dst.type, 4); + inst->dst = tmp; + inst->regs_written = 4; + bld.MOV(dst, offset(tmp, bld, 3)); + } } } @@ -1626,6 +1747,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { assert(stage == MESA_SHADER_GEOMETRY); + fs_reg indirect_offset; fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -1644,9 +1766,11 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, unreachable("load_input intrinsics are invalid for the GS stage"); case nir_intrinsic_load_per_vertex_input_indirect: - assert(!"Not allowed"); + indirect_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D); + /* fallthrough */ case nir_intrinsic_load_per_vertex_input: - emit_gs_input_load(dest, instr->src[0], instr->const_index[0], + emit_gs_input_load(dest, instr->src[0], + indirect_offset, instr->const_index[0], instr->num_components); break; @@ -1703,6 +1827,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, break; } + case nir_intrinsic_load_helper_invocation: case nir_intrinsic_load_sample_mask_in: case nir_intrinsic_load_sample_id: { gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); @@ -1723,7 +1848,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, fs_inst *cmp; if (instr->intrinsic == nir_intrinsic_discard_if) { cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), - fs_reg(0), BRW_CONDITIONAL_Z); + brw_imm_d(0), BRW_CONDITIONAL_Z); } else { fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); @@ -1771,7 +1896,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, fs_reg(), /* src */ - fs_reg(0u), + brw_imm_ud(0u), interpolation); break; @@ -1785,7 +1910,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, fs_reg(), /* src */ - fs_reg(msg_data), + brw_imm_ud(msg_data), interpolation); } else { const fs_reg sample_src = retype(get_nir_src(instr->src[0]), @@ -1794,7 +1919,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, if (nir_src_is_dynamically_uniform(instr->src[0])) { const fs_reg sample_id = bld.emit_uniformize(sample_src); const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + bld.exec_all().group(1, 0) + .SHL(msg_data, sample_id, brw_imm_ud(4u)); emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, @@ -1820,7 +1946,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, sample_src, sample_id, BRW_CONDITIONAL_EQ); const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + bld.exec_all().group(1, 0) + .SHL(msg_data, sample_id, brw_imm_ud(4u)); fs_inst *inst = emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, @@ -1851,7 +1978,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, fs_reg(), /* src */ - fs_reg(off_x | (off_y << 4)), + brw_imm_ud(off_x | (off_y << 4)), interpolation); } else { fs_reg src = vgrf(glsl_type::ivec2_type); @@ -1859,7 +1986,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, BRW_REGISTER_TYPE_F); for (int i = 0; i < 2; i++) { fs_reg temp = vgrf(glsl_type::float_type); - bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f)); + bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); fs_reg itemp = vgrf(glsl_type::int_type); bld.MOV(itemp, temp); /* float to int */ @@ -1879,7 +2006,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, * FRAGMENT_INTERPOLATION_OFFSET_BITS" */ set_condmod(BRW_CONDITIONAL_L, - bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); + bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7))); } const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; @@ -1887,7 +2014,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, opcode, dst_xy, src, - fs_reg(0u), + brw_imm_ud(0u), interpolation); } break; @@ -1947,14 +2074,14 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, cs_prog_data->uses_num_work_groups = true; - fs_reg surf_index = fs_reg(surface); + fs_reg surf_index = brw_imm_ud(surface); brw_mark_surface_used(prog_data, surface); /* Read the 3 GLuint components of gl_NumWorkGroups */ for (unsigned i = 0; i < 3; i++) { fs_reg read_result = emit_untyped_read(bld, surf_index, - fs_reg(i << 2), + brw_imm_ud(i << 2), 1 /* dims */, 1 /* size */, BRW_PREDICATE_NONE); read_result.type = dest.type; @@ -1994,16 +2121,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr /* Emit a surface read or atomic op. */ switch (instr->intrinsic) { case nir_intrinsic_atomic_counter_read: - tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1); + tmp = emit_untyped_read(bld, brw_imm_ud(surface), offset, 1, 1); break; case nir_intrinsic_atomic_counter_inc: - tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(), fs_reg(), 1, 1, BRW_AOP_INC); break; case nir_intrinsic_atomic_counter_dec: - tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(), fs_reg(), 1, 1, BRW_AOP_PREDEC); break; @@ -2145,14 +2272,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr for (unsigned c = 0; c < info->dest_components; ++c) { if ((int)c >= type->coordinate_components()) { bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), - fs_reg(1)); + brw_imm_d(1)); } else if (c == 1 && is_1d_array_image) { bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), offset(size, bld, 2)); } else if (c == 2 && is_cube_array_image) { bld.emit(SHADER_OPCODE_INT_QUOTIENT, offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), - offset(size, bld, c), fs_reg(6)); + offset(size, bld, c), brw_imm_d(6)); } else { bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), offset(size, bld, c)); @@ -2164,7 +2291,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_image_samples: /* The driver does not support multi-sampled images. */ - bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1)); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); break; case nir_intrinsic_load_uniform_indirect: @@ -2195,7 +2322,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_index) { const unsigned index = stage_prog_data->binding_table.ubo_start + const_index->u[0]; - surf_index = fs_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { /* The block index is not a constant. Evaluate the index expression @@ -2204,7 +2331,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr */ surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + brw_imm_ud(stage_prog_data->binding_table.ubo_start)); surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide @@ -2220,7 +2347,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg base_offset = vgrf(glsl_type::int_type); bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D), - fs_reg(2)); + brw_imm_d(2)); unsigned vec4_offset = instr->const_index[0] / 4; for (int i = 0; i < instr->num_components; i++) @@ -2230,7 +2357,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); + struct brw_reg const_offset_reg = brw_imm_ud(instr->const_index[0] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); @@ -2262,12 +2389,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_uniform_block) { unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = fs_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ssbo_start)); + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -2282,7 +2409,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (has_indirect) { offset_reg = get_nir_src(instr->src[1]); } else { - offset_reg = fs_reg(instr->const_index[0]); + offset_reg = brw_imm_ud(instr->const_index[0]); } /* Read the vector */ @@ -2333,12 +2460,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_uniform_block) { unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = fs_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[1]), - fs_reg(stage_prog_data->binding_table.ssbo_start)); + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ssbo_start + @@ -2362,12 +2489,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg offset_reg; if (!has_indirect) { - offset_reg = fs_reg(instr->const_index[0] + 4 * first_component); + offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component); } else { offset_reg = vgrf(glsl_type::uint_type); bld.ADD(offset_reg, retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD), - fs_reg(4 * first_component)); + brw_imm_ud(4 * first_component)); } emit_untyped_write(bld, surf_index, offset_reg, @@ -2438,7 +2565,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr int reg_width = dispatch_width / 8; /* Set LOD = 0 */ - fs_reg source = fs_reg(0); + fs_reg source = brw_imm_d(0); int mlen = 1 * reg_width; @@ -2457,7 +2584,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_UD); const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index; fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, buffer_size, - src_payload, fs_reg(index)); + src_payload, brw_imm_ud(index)); inst->header_size = 0; inst->mlen = mlen; inst->regs_written = regs_written; @@ -2486,12 +2613,12 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, if (const_surface) { unsigned surf_index = stage_prog_data->binding_table.ssbo_start + const_surface->u[0]; - surface = fs_reg(surf_index); + surface = brw_imm_ud(surf_index); brw_mark_surface_used(prog_data, surf_index); } else { surface = vgrf(glsl_type::uint_type); bld.ADD(surface, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ssbo_start)); + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); /* Assume this may touch any SSBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. @@ -2524,13 +2651,11 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; - fs_reg texture_reg(texture); - fs_reg sampler_reg(sampler); + fs_reg texture_reg(brw_imm_ud(texture)); + fs_reg sampler_reg(brw_imm_ud(sampler)); int gather_component = instr->component; - bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT; - bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array; @@ -2552,6 +2677,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: + case nir_texop_samples_identical: coordinate = retype(src, BRW_REGISTER_TYPE_D); break; default: @@ -2604,7 +2730,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ texture_reg = vgrf(glsl_type::uint_type); - bld.ADD(texture_reg, src, fs_reg(texture)); + bld.ADD(texture_reg, src, brw_imm_ud(texture)); texture_reg = bld.emit_uniformize(texture_reg); break; } @@ -2612,7 +2738,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); - bld.ADD(sampler_reg, src, fs_reg(sampler)); + bld.ADD(sampler_reg, src, brw_imm_ud(sampler)); sampler_reg = bld.emit_uniformize(sampler_reg); break; } @@ -2622,19 +2748,20 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) } } - if (instr->op == nir_texop_txf_ms) { + if (instr->op == nir_texop_txf_ms || + instr->op == nir_texop_samples_identical) { if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << texture)) { mcs = emit_mcs_fetch(coordinate, instr->coord_components, texture_reg); } else { - mcs = fs_reg(0u); + mcs = brw_imm_ud(0u); } } for (unsigned i = 0; i < 3; i++) { if (instr->const_offset[i] != 0) { assert(offset_components == 0); - tex_offset = fs_reg(brw_texture_offset(instr->const_offset, 3)); + tex_offset = brw_imm_ud(brw_texture_offset(instr->const_offset, 3)); break; } } @@ -2668,6 +2795,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) inst->base_mrf = -1; return; } + case nir_texop_samples_identical: op = ir_samples_identical; break; default: unreachable("unknown texture opcode"); } @@ -2675,8 +2803,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, - is_cube_array, is_rect, - texture, texture_reg, sampler, sampler_reg); + is_cube_array, texture, texture_reg, sampler, sampler_reg); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 534d8490cdf..45694ec0894 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -44,7 +44,7 @@ namespace brw { */ const fs_reg usurface = bld.emit_uniformize(surface); const fs_reg srcs[] = { - addr, src, usurface, fs_reg(dims), fs_reg(arg) + addr, src, usurface, brw_imm_ud(dims), brw_imm_ud(arg) }; const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize); fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); @@ -330,7 +330,7 @@ namespace { * messages causes a hang on IVB and VLV. */ set_predicate(pred, - bld.CMP(bld.null_reg_ud(), stride, fs_reg(4), + bld.CMP(bld.null_reg_ud(), stride, brw_imm_d(4), BRW_CONDITIONAL_G)); return BRW_PREDICATE_NORMAL; @@ -361,7 +361,7 @@ namespace { */ bld.CMP(bld.null_reg_ud(), retype(size, BRW_REGISTER_TYPE_UD), - fs_reg(0), BRW_CONDITIONAL_NZ); + brw_imm_d(0), BRW_CONDITIONAL_NZ); return BRW_PREDICATE_NORMAL; } else { @@ -438,7 +438,7 @@ namespace { * FINISHME: Factor out this frequently recurring pattern into a * helper function. */ - const fs_reg srcs[] = { addr, fs_reg(0), offset(addr, bld, 1) }; + const fs_reg srcs[] = { addr, brw_imm_d(0), offset(addr, bld, 1) }; const fs_reg dst = bld.vgrf(addr.type, dims); bld.LOAD_PAYLOAD(dst, srcs, dims, 0); return dst; @@ -488,7 +488,7 @@ namespace { bld.ADD(offset(addr, bld, c), offset(off, bld, c), (c < dims ? offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) : - fs_reg(0))); + fs_reg(brw_imm_d(0)))); /* The layout of 3-D textures in memory is sort-of like a tiling * format. At each miplevel, the slices are arranged in rows of @@ -515,7 +515,7 @@ namespace { /* Decompose z into a major (tmp.y) and a minor (tmp.x) * index. */ - bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0), + bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), brw_imm_d(0), offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2)); bld.SHR(offset(tmp, bld, 1), offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2), @@ -549,7 +549,7 @@ namespace { for (unsigned c = 0; c < 2; ++c) { /* Calculate the minor x and y indices. */ bld.BFE(offset(minor, bld, c), offset(tile, bld, c), - fs_reg(0), offset(addr, bld, c)); + brw_imm_d(0), offset(addr, bld, c)); /* Calculate the major x and y indices. */ bld.SHR(offset(major, bld, c), @@ -595,7 +595,7 @@ namespace { /* XOR tmp.x and tmp.y with bit 6 of the memory address. */ bld.XOR(tmp, tmp, offset(tmp, bld, 1)); - bld.AND(tmp, tmp, fs_reg(1 << 6)); + bld.AND(tmp, tmp, brw_imm_d(1 << 6)); bld.XOR(dst, dst, tmp); } @@ -647,7 +647,7 @@ namespace { const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); /* Shift each component left to the correct bitfield position. */ - bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32)); + bld.SHL(tmp, offset(src, bld, c), brw_imm_ud(shifts[c] % 32)); /* Add everything up. */ if (seen[shifts[c] / 32]) { @@ -679,13 +679,13 @@ namespace { /* Shift left to discard the most significant bits. */ bld.SHL(offset(dst, bld, c), offset(src, bld, shifts[c] / 32), - fs_reg(32 - shifts[c] % 32 - widths[c])); + brw_imm_ud(32 - shifts[c] % 32 - widths[c])); /* Shift back to the least significant bits using an arithmetic * shift to get sign extension on signed types. */ bld.ASR(offset(dst, bld, c), - offset(dst, bld, c), fs_reg(32 - widths[c])); + offset(dst, bld, c), brw_imm_ud(32 - widths[c])); } } @@ -709,13 +709,13 @@ namespace { if (widths[c]) { /* Clamp to the maximum value. */ bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c), - fs_reg((int)scale(widths[c] - s)), + brw_imm_d((int)scale(widths[c] - s)), BRW_CONDITIONAL_L); /* Clamp to the minimum value. */ if (is_signed) bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), - fs_reg(-(int)scale(widths[c] - s) - 1), + brw_imm_d(-(int)scale(widths[c] - s) - 1), BRW_CONDITIONAL_GE); } } @@ -741,12 +741,12 @@ namespace { /* Divide by the normalization constants. */ bld.MUL(offset(dst, bld, c), offset(dst, bld, c), - fs_reg(1.0f / scale(widths[c] - s))); + brw_imm_f(1.0f / scale(widths[c] - s))); /* Clamp to the minimum value. */ if (is_signed) bld.emit_minmax(offset(dst, bld, c), - offset(dst, bld, c), fs_reg(-1.0f), + offset(dst, bld, c), brw_imm_f(-1.0f), BRW_CONDITIONAL_GE); } } @@ -771,10 +771,10 @@ namespace { /* Clamp the normalized floating-point argument. */ if (is_signed) { bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c), - fs_reg(-1.0f), BRW_CONDITIONAL_GE); + brw_imm_f(-1.0f), BRW_CONDITIONAL_GE); bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), - fs_reg(1.0f), BRW_CONDITIONAL_L); + brw_imm_f(1.0f), BRW_CONDITIONAL_L); } else { set_saturate(true, bld.MOV(offset(fdst, bld, c), offset(src, bld, c))); @@ -782,7 +782,7 @@ namespace { /* Multiply by the normalization constants. */ bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c), - fs_reg((float)scale(widths[c] - s))); + brw_imm_f((float)scale(widths[c] - s))); /* Convert to integer. */ bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c)); @@ -814,7 +814,7 @@ namespace { */ if (widths[c] < 16) bld.SHL(offset(dst, bld, c), - offset(dst, bld, c), fs_reg(15 - widths[c])); + offset(dst, bld, c), brw_imm_ud(15 - widths[c])); /* Convert to 32-bit floating point. */ bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c)); @@ -842,7 +842,7 @@ namespace { /* Clamp to the minimum value. */ if (widths[c] < 16) bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), - fs_reg(0.0f), BRW_CONDITIONAL_GE); + brw_imm_f(0.0f), BRW_CONDITIONAL_GE); /* Convert to 16-bit floating-point. */ bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c)); @@ -855,7 +855,7 @@ namespace { */ if (widths[c] < 16) bld.SHR(offset(dst, bld, c), offset(dst, bld, c), - fs_reg(15 - widths[c])); + brw_imm_ud(15 - widths[c])); } } @@ -874,7 +874,8 @@ namespace { for (unsigned c = 0; c < 4; ++c) bld.MOV(offset(dst, bld, c), - widths[c] ? offset(src, bld, c) : fs_reg(pad[c])); + widths[c] ? offset(src, bld, c) + : fs_reg(brw_imm_ud(pad[c]))); return dst; } @@ -939,7 +940,7 @@ namespace brw { /* An out of bounds surface access should give zero as result. */ for (unsigned c = 0; c < size; ++c) set_predicate(pred, bld.SEL(offset(tmp, bld, c), - offset(tmp, bld, c), fs_reg(0))); + offset(tmp, bld, c), brw_imm_d(0))); } /* Set the register type to D instead of UD if the data type is @@ -1122,7 +1123,7 @@ namespace brw { /* An unbound surface access should give zero as result. */ if (rsize) - set_predicate(pred, bld.SEL(tmp, tmp, fs_reg(0))); + set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0))); return tmp; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2647a40c730..e82acd141f3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -79,122 +79,6 @@ fs_visitor::emit_vs_system_value(int location) return reg; } -fs_reg -fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, - bool is_rect, uint32_t sampler) -{ - bool needs_gl_clamp = true; - fs_reg scale_x, scale_y; - - /* The 965 requires the EU to do the normalization of GL rectangle - * texture coordinates. We use the program parameter state - * tracking to get the scaling factor. - */ - if (is_rect && - (devinfo->gen < 6 || - (devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) || - key_tex->gl_clamp_mask[1] & (1 << sampler))))) { - struct gl_program_parameter_list *params = prog->Parameters; - - - /* FINISHME: We're failing to recompile our programs when the sampler is - * updated. This only matters for the texture rectangle scale - * parameters (pre-gen6, or gen6+ with GL_CLAMP). - */ - int tokens[STATE_LENGTH] = { - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - prog->SamplerUnits[sampler], - 0, - 0 - }; - - no16("rectangle scale uniform setup not supported on SIMD16\n"); - if (dispatch_width == 16) { - return coordinate; - } - - GLuint index = _mesa_add_state_reference(params, - (gl_state_index *)tokens); - /* Try to find existing copies of the texrect scale uniforms. */ - for (unsigned i = 0; i < uniforms; i++) { - if (stage_prog_data->param[i] == - &prog->Parameters->ParameterValues[index][0]) { - scale_x = fs_reg(UNIFORM, i); - scale_y = fs_reg(UNIFORM, i + 1); - break; - } - } - - /* If we didn't already set them up, do so now. */ - if (scale_x.file == BAD_FILE) { - scale_x = fs_reg(UNIFORM, uniforms); - scale_y = fs_reg(UNIFORM, uniforms + 1); - - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][0]; - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][1]; - } - } - - /* The 965 requires the EU to do the normalization of GL rectangle - * texture coordinates. We use the program parameter state - * tracking to get the scaling factor. - */ - if (devinfo->gen < 6 && is_rect) { - fs_reg dst = fs_reg(VGRF, alloc.allocate(coord_components)); - fs_reg src = coordinate; - coordinate = dst; - - bld.MUL(dst, src, scale_x); - dst = offset(dst, bld, 1); - src = offset(src, bld, 1); - bld.MUL(dst, src, scale_y); - } else if (is_rect) { - /* On gen6+, the sampler handles the rectangle coordinates - * natively, without needing rescaling. But that means we have - * to do GL_CLAMP clamping at the [0, width], [0, height] scale, - * not [0, 1] like the default case below. - */ - needs_gl_clamp = false; - - for (int i = 0; i < 2; i++) { - if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { - fs_reg chan = coordinate; - chan = offset(chan, bld, i); - - set_condmod(BRW_CONDITIONAL_GE, - bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f))); - - /* Our parameter comes in as 1.0/width or 1.0/height, - * because that's what people normally want for doing - * texture rectangle handling. We need width or height - * for clamping, but we don't care enough to make a new - * parameter type, so just invert back. - */ - fs_reg limit = vgrf(glsl_type::float_type); - bld.MOV(limit, i == 0 ? scale_x : scale_y); - bld.emit(SHADER_OPCODE_RCP, limit, limit); - - set_condmod(BRW_CONDITIONAL_L, - bld.emit(BRW_OPCODE_SEL, chan, chan, limit)); - } - } - } - - if (coord_components > 0 && needs_gl_clamp) { - for (int i = 0; i < MIN2(coord_components, 3); i++) { - if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { - fs_reg chan = coordinate; - chan = offset(chan, bld, i); - set_saturate(true, bld.MOV(chan, chan)); - } - } - } - return coordinate; -} - /* Sample from the MCS surface attached to this multisample texture. */ fs_reg fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, @@ -203,7 +87,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, const fs_reg dest = vgrf(glsl_type::uvec4_type); const fs_reg srcs[] = { coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(), - texture, texture, fs_reg(), fs_reg(components), fs_reg(0) + texture, texture, fs_reg(), brw_imm_ud(components), brw_imm_d(0) }; fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, ARRAY_SIZE(srcs)); @@ -227,7 +111,6 @@ fs_visitor::emit_texture(ir_texture_opcode op, fs_reg mcs, int gather_component, bool is_cube_array, - bool is_rect, uint32_t surface, fs_reg surface_reg, uint32_t sampler, @@ -235,38 +118,32 @@ fs_visitor::emit_texture(ir_texture_opcode op, { fs_inst *inst = NULL; - if (op == ir_tg4) { - /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother - * emitting anything other than setting up the constant result. - */ - int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component); - if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { - - fs_reg res = vgrf(glsl_type::vec4_type); - this->result = res; - - for (int i=0; i<4; i++) { - bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)); - res = offset(res, bld, 1); - } - return; - } - } - if (op == ir_query_levels) { /* textureQueryLevels() is implemented in terms of TXS so we need to * pass a valid LOD argument. */ assert(lod.file == BAD_FILE); - lod = fs_reg(0u); + lod = brw_imm_ud(0u); } - if (coordinate.file != BAD_FILE) { - /* FINISHME: Texture coordinate rescaling doesn't work with non-constant - * samplers. This should only be a problem with GL_CLAMP on Gen7. + if (op == ir_samples_identical) { + fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 1)); + + /* If mcs is an immediate value, it means there is no MCS. In that case + * just return false. */ - coordinate = rescale_texcoord(coordinate, coord_components, is_rect, - sampler); + if (mcs.file == BRW_IMMEDIATE_VALUE) { + bld.MOV(dst, brw_imm_ud(0u)); + } else if ((key_tex->msaa_16 & (1 << sampler))) { + fs_reg tmp = vgrf(glsl_type::uint_type); + bld.OR(tmp, mcs, offset(mcs, bld, 1)); + bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ); + } else { + bld.CMP(dst, mcs, brw_imm_ud(0u), BRW_CONDITIONAL_EQ); + } + + this->result = dst; + return; } /* Writemasking doesn't eliminate channels on SIMD8 texture @@ -276,7 +153,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, const fs_reg srcs[] = { coordinate, shadow_c, lod, lod2, sample_index, mcs, surface_reg, sampler_reg, offset_value, - fs_reg(coord_components), fs_reg(grad_components) + brw_imm_d(coord_components), brw_imm_d(grad_components) }; enum opcode opcode; @@ -327,8 +204,15 @@ fs_visitor::emit_texture(ir_texture_opcode op, inst->offset = offset_value.ud; if (op == ir_tg4) { - inst->offset |= - gather_channel(gather_component, surface, sampler) << 16; /* M0.2:16-17 */ + if (gather_component == 1 && + key_tex->gather_channel_quirk_mask & (1 << surface)) { + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + inst->offset |= 2 << 16; + } else { + inst->offset |= gather_component << 16; + } if (devinfo->gen == 6) emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], dst); @@ -338,7 +222,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, if (op == ir_txs && is_cube_array) { fs_reg depth = offset(dst, bld, 2); fs_reg fixed_depth = vgrf(glsl_type::int_type); - bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); + bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6)); fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); int components = inst->regs_written / (inst->exec_size / 8); @@ -352,7 +236,12 @@ fs_visitor::emit_texture(ir_texture_opcode op, bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0); } - swizzle_result(op, dest_type->vector_elements, dst, sampler); + if (op == ir_query_levels) { + /* # levels is in .w */ + dst = offset(dst, bld, 3); + } + + this->result = dst; } /** @@ -369,7 +258,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) for (int i = 0; i < 4; i++) { fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); /* Convert from UNORM to UINT */ - bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))); + bld.MUL(dst_f, dst_f, brw_imm_f((1 << width) - 1)); bld.MOV(dst, dst_f); if (wa & WA_SIGN) { @@ -377,83 +266,14 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) * shifting the sign bit into place, then shifting back * preserving sign. */ - bld.SHL(dst, dst, fs_reg(32 - width)); - bld.ASR(dst, dst, fs_reg(32 - width)); + bld.SHL(dst, dst, brw_imm_d(32 - width)); + bld.ASR(dst, dst, brw_imm_d(32 - width)); } dst = offset(dst, bld, 1); } } -/** - * Set up the gather channel based on the swizzle, for gather4. - */ -uint32_t -fs_visitor::gather_channel(int orig_chan, uint32_t surface, uint32_t sampler) -{ - int swiz = GET_SWZ(key_tex->swizzles[sampler], orig_chan); - switch (swiz) { - case SWIZZLE_X: return 0; - case SWIZZLE_Y: - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - if (key_tex->gather_channel_quirk_mask & (1 << surface)) - return 2; - return 1; - case SWIZZLE_Z: return 2; - case SWIZZLE_W: return 3; - default: - unreachable("Not reached"); /* zero, one swizzles handled already */ - } -} - -/** - * Swizzle the result of a texture result. This is necessary for - * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. - */ -void -fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, - fs_reg orig_val, uint32_t sampler) -{ - if (op == ir_query_levels) { - /* # levels is in .w */ - this->result = offset(orig_val, bld, 3); - return; - } - - this->result = orig_val; - - /* txs,lod don't actually sample the texture, so swizzling the result - * makes no sense. - */ - if (op == ir_txs || op == ir_lod || op == ir_tg4) - return; - - if (dest_components == 1) { - /* Ignore DEPTH_TEXTURE_MODE swizzling. */ - } else if (key_tex->swizzles[sampler] != SWIZZLE_NOOP) { - fs_reg swizzled_result = vgrf(glsl_type::vec4_type); - swizzled_result.type = orig_val.type; - - for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(key_tex->swizzles[sampler], i); - fs_reg l = swizzled_result; - l = offset(l, bld, i); - - if (swiz == SWIZZLE_ZERO) { - bld.MOV(l, fs_reg(0.0f)); - } else if (swiz == SWIZZLE_ONE) { - bld.MOV(l, fs_reg(1.0f)); - } else { - bld.MOV(l, offset(orig_val, bld, - GET_SWZ(key_tex->swizzles[sampler], i))); - } - } - this->result = swizzled_result; - } -} - /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ void fs_visitor::emit_dummy_fs() @@ -464,7 +284,7 @@ fs_visitor::emit_dummy_fs() const float color[4] = { 1.0, 0.0, 1.0, 0.0 }; for (int i = 0; i < 4; i++) { bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F), - fs_reg(color[i])); + brw_imm_f(color[i])); } fs_inst *write; @@ -683,7 +503,7 @@ fs_visitor::emit_alpha_test() fs_reg color = offset(outputs[0], bld, 3); /* f0.1 &= func(color, ref) */ - cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref), + cmp = abld.CMP(bld.null_reg_f(), color, brw_imm_f(key->alpha_test_ref), cond_for_alpha_func(key->alpha_test_func)); } cmp->predicate = BRW_PREDICATE_NORMAL; @@ -716,7 +536,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, const fs_reg sources[] = { color0, color1, src0_alpha, src_depth, dst_depth, src_stencil, - sample_mask, fs_reg(components) + sample_mask, brw_imm_ud(components) }; assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS); fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), @@ -950,12 +770,12 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) fs_reg offset; if (gs_vertex_count.file == IMM) { - per_slot_offsets = fs_reg(output_vertex_size_owords * - gs_vertex_count.ud); + per_slot_offsets = brw_imm_ud(output_vertex_size_owords * + gs_vertex_count.ud); } else { per_slot_offsets = vgrf(glsl_type::int_type); bld.MUL(per_slot_offsets, gs_vertex_count, - fs_reg(output_vertex_size_owords)); + brw_imm_ud(output_vertex_size_owords)); } } @@ -978,7 +798,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) } fs_reg zero(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - bld.MOV(zero, fs_reg(0u)); + bld.MOV(zero, brw_imm_ud(0u)); sources[length++] = zero; if (vue_map->slots_valid & VARYING_BIT_LAYER) @@ -1038,7 +858,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) for (unsigned i = 0; i < output_components[varying]; i++) sources[length++] = offset(this->outputs[varying], bld, i); for (unsigned i = output_components[varying]; i < 4; i++) - sources[length++] = fs_reg(0); + sources[length++] = brw_imm_d(0); } break; } @@ -1115,11 +935,11 @@ fs_visitor::emit_barrier() const fs_builder pbld = bld.exec_all().group(8, 0); /* Clear the message payload */ - pbld.MOV(payload, fs_reg(0u)); + pbld.MOV(payload, brw_imm_ud(0u)); /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); - pbld.AND(component(payload, 2), r0_2, fs_reg(0x0f000000u)); + pbld.AND(component(payload, 2), r0_2, brw_imm_ud(0x0f000000u)); /* Emit a gateway "barrier" message using the payload we set up, followed * by a wait instruction. diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index ed0890f430f..149b43ba055 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -75,7 +75,9 @@ brw_codegen_gs_prog(struct brw_context *brw, * every uniform is a float which gets padded to the size of a vec4. */ struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - int param_count = gp->program.Base.nir->num_uniforms * 4; + int param_count = gp->program.Base.nir->num_uniforms; + if (!compiler->scalar_stage[MESA_SHADER_GEOMETRY]) + param_count *= 4; prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); @@ -87,7 +89,8 @@ brw_codegen_gs_prog(struct brw_context *brw, prog_data.base.base.nr_image_params = gs->NumImages; brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base, - &prog_data.base.base, compiler->scalar_gs); + &prog_data.base.base, + compiler->scalar_stage[MESA_SHADER_GEOMETRY]); GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h index 4ed95c473cd..cd9f6ef591d 100644 --- a/src/mesa/drivers/dri/i965/brw_inst.h +++ b/src/mesa/drivers/dri/i965/brw_inst.h @@ -694,7 +694,7 @@ brw_inst_bits(const brw_inst *inst, unsigned high, unsigned low) high %= 64; low %= 64; - const uint64_t mask = (1ull << (high - low + 1)) - 1; + const uint64_t mask = (~0ull >> (64 - (high - low + 1))); return (inst->data[word] >> low) & mask; } @@ -713,7 +713,7 @@ brw_inst_set_bits(brw_inst *inst, unsigned high, unsigned low, uint64_t value) high %= 64; low %= 64; - const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low; + const uint64_t mask = (~0ull >> (64 - (high - low + 1))) << low; /* Make sure the supplied value actually fits in the given bitfield. */ assert((value & (mask >> low)) == value); diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 7e977e9e727..0410053ce27 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -36,11 +36,6 @@ public: void init(); fs_reg(); - explicit fs_reg(float f); - explicit fs_reg(int32_t i); - explicit fs_reg(uint32_t u); - explicit fs_reg(uint8_t vf[4]); - explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3); fs_reg(struct brw_reg reg); fs_reg(enum brw_reg_file file, int nr); fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type); diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 110e64b979e..e2e66044d3a 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -41,11 +41,6 @@ public: src_reg(enum brw_reg_file file, int nr, const glsl_type *type); src_reg(); - src_reg(float f); - src_reg(uint32_t u); - src_reg(int32_t i); - src_reg(uint8_t vf[4]); - src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3); src_reg(struct brw_reg reg); bool equals(const src_reg &r) const; diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 29911732761..14421d421b6 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -66,12 +66,14 @@ brw_lower_packing_builtins(struct brw_context *brw, gl_shader_stage shader_type, exec_list *ir) { + const struct brw_compiler *compiler = brw->intelScreen->compiler; + int ops = LOWER_PACK_SNORM_2x16 | LOWER_UNPACK_SNORM_2x16 | LOWER_PACK_UNORM_2x16 | LOWER_UNPACK_UNORM_2x16; - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { + if (compiler->scalar_stage[shader_type]) { ops |= LOWER_UNPACK_UNORM_4x8 | LOWER_UNPACK_SNORM_4x8 | LOWER_PACK_UNORM_4x8 @@ -84,7 +86,7 @@ brw_lower_packing_builtins(struct brw_context *brw, * lowering is needed. For SOA code, the Half2x16 ops must be * scalarized. */ - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { + if (compiler->scalar_stage[shader_type]) { ops |= LOWER_PACK_HALF_2x16_TO_SPLIT | LOWER_UNPACK_HALF_2x16_TO_SPLIT; } @@ -103,6 +105,7 @@ process_glsl_ir(gl_shader_stage stage, struct gl_shader *shader) { struct gl_context *ctx = &brw->ctx; + const struct brw_compiler *compiler = brw->intelScreen->compiler; const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[shader->Stage]; @@ -161,7 +164,7 @@ process_glsl_ir(gl_shader_stage stage, do { progress = false; - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) { + if (compiler->scalar_stage[shader->Stage]) { brw_do_channel_expressions(shader->ir); brw_do_vector_splitting(shader->ir); } @@ -252,7 +255,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_add_texrect_params(prog); prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, - is_scalar_shader_stage(compiler, stage)); + compiler->scalar_stage[stage]); _mesa_reference_program(ctx, &prog, NULL); } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 12e7c32e424..1f8bfdfa492 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances) } static void -get_fast_clear_rect(struct gl_framebuffer *fb, +get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb, struct intel_renderbuffer *irb, struct rect *rect) { unsigned int x_align, y_align; @@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb, */ intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align); x_align *= 16; - y_align *= 32; + + /* SKL+ line alignment requirement for Y-tiled are half those of the prior + * generations. + */ + if (brw->gen >= 9) + y_align *= 16; + else + y_align *= 32; /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): @@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb, * terms of (width,height) of the RT. * * MSAA Width of Clear Rect Height of Clear Rect + * 2X Ceil(1/8*width) Ceil(1/2*height) * 4X Ceil(1/8*width) Ceil(1/2*height) * 8X Ceil(1/2*width) Ceil(1/2*height) + * 16X width Ceil(1/2*height) * * The text "with upper left co-ordinate to coincide with actual * rectangle being cleared" is a little confusing--it seems to imply @@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb, case 8: x_scaledown = 2; break; + case 16: + x_scaledown = 1; + break; default: unreachable("Unexpected sample count for fast clear"); } @@ -347,8 +359,12 @@ is_color_fast_clear_compatible(struct brw_context *brw, } for (int i = 0; i < 4; i++) { - if (color->f[i] != 0.0f && color->f[i] != 1.0f && - _mesa_format_has_color_component(format, i)) { + if (!_mesa_format_has_color_component(format, i)) { + continue; + } + + if (brw->gen < 9 && + color->f[i] != 0.0f && color->f[i] != 1.0f) { return false; } } @@ -357,18 +373,55 @@ is_color_fast_clear_compatible(struct brw_context *brw, /** * Convert the given color to a bitfield suitable for ORing into DWORD 7 of - * SURFACE_STATE. + * SURFACE_STATE (DWORD 12-15 on SKL+). */ -static uint32_t -compute_fast_clear_color_bits(const union gl_color_union *color) +static void +set_fast_clear_color(struct brw_context *brw, + struct intel_mipmap_tree *mt, + const union gl_color_union *color) { - uint32_t bits = 0; - for (int i = 0; i < 4; i++) { - /* Testing for non-0 works for integer and float colors */ - if (color->f[i] != 0.0f) - bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + union gl_color_union override_color = *color; + + /* The sampler doesn't look at the format of the surface when the fast + * clear color is used so we need to implement luminance, intensity and + * missing components manually. + */ + switch (_mesa_get_format_base_format(mt->format)) { + case GL_INTENSITY: + override_color.ui[3] = override_color.ui[0]; + /* flow through */ + case GL_LUMINANCE: + case GL_LUMINANCE_ALPHA: + override_color.ui[1] = override_color.ui[0]; + override_color.ui[2] = override_color.ui[0]; + break; + default: + for (int i = 0; i < 3; i++) { + if (!_mesa_format_has_color_component(mt->format, i)) + override_color.ui[i] = 0; + } + break; + } + + if (!_mesa_format_has_color_component(mt->format, 3)) { + if (_mesa_is_format_integer_color(mt->format)) + override_color.ui[3] = 1; + else + override_color.f[3] = 1.0f; + } + + if (brw->gen >= 9) { + mt->gen9_fast_clear_color = override_color; + } else { + mt->fast_clear_color_value = 0; + for (int i = 0; i < 4; i++) { + /* Testing for non-0 works for integer and float colors */ + if (override_color.f[i] != 0.0f) { + mt->fast_clear_color_value |= + 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + } + } } - return bits; } static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 }; @@ -408,6 +461,55 @@ use_rectlist(struct brw_context *brw, bool enable) brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; } +/** + * Individually fast clear each color buffer attachment. On previous gens this + * isn't required. The motivation for this comes from one line (which seems to + * be specific to SKL+). The list item is in section titled _MCS Buffer for + * Render Target(s)_ + * + * "Since only one RT is bound with a clear pass, only one RT can be cleared + * at a time. To clear multiple RTs, multiple clear passes are required." + * + * The code follows the same idea as the resolve code which creates a fake FBO + * to avoid interfering with too much of the GL state. + */ +static void +fast_clear_attachments(struct brw_context *brw, + struct gl_framebuffer *fb, + uint32_t fast_clear_buffers, + struct rect fast_clear_rect) +{ + assert(brw->gen >= 9); + struct gl_context *ctx = &brw->ctx; + + brw_bind_rep_write_shader(brw, (float *) fast_clear_color); + + /* SKL+ also has a resolve mode for compressed render targets and thus more + * bits to let us select the type of resolve. For fast clear resolves, it + * turns out we can use the same value as pre-SKL though. + */ + set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE); + + while (fast_clear_buffers) { + int index = ffs(fast_clear_buffers) - 1; + + fast_clear_buffers &= ~(1 << index); + + _mesa_meta_drawbuffers_from_bitfield(1 << index); + + brw_draw_rectlist(ctx, &fast_clear_rect, MAX2(1, fb->MaxNumLayers)); + + /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll + * resolve them eventually. + */ + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; + } + + set_fast_clear_op(brw, 0); +} + bool brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, GLbitfield buffers, bool partial_clear) @@ -447,13 +549,15 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, if (brw->gen < 7) clear_type = REP_CLEAR; - if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS) + /* Certain formats have unresolved issues with sampling from the MCS + * buffer on Gen9. This disables fast clears altogether for MSRTs until + * we can figure out what's going on. + */ + if (brw->gen >= 9 && irb->mt->num_samples > 1) clear_type = REP_CLEAR; - if (brw->gen >= 9 && clear_type == FAST_CLEAR) { - perf_debug("fast MCS clears are disabled on gen9"); + if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS) clear_type = REP_CLEAR; - } /* We can't do scissored fast clears because of the restrictions on the * fast clear rectangle size. @@ -503,8 +607,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, switch (clear_type) { case FAST_CLEAR: - irb->mt->fast_clear_color_value = - compute_fast_clear_color_bits(&ctx->Color.ClearColor); + set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor); irb->need_downsample = true; /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the @@ -520,7 +623,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; irb->need_downsample = true; fast_clear_buffers |= 1 << index; - get_fast_clear_rect(fb, irb, &fast_clear_rect); + get_fast_clear_rect(brw, fb, irb, &fast_clear_rect); break; case REP_CLEAR: @@ -584,12 +687,27 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, use_rectlist(brw, true); layers = MAX2(1, fb->MaxNumLayers); - if (fast_clear_buffers) { + + if (brw->gen >= 9 && fast_clear_buffers) { + fast_clear_attachments(brw, fb, fast_clear_buffers, fast_clear_rect); + } else if (fast_clear_buffers) { _mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers); brw_bind_rep_write_shader(brw, (float *) fast_clear_color); set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE); brw_draw_rectlist(ctx, &fast_clear_rect, layers); set_fast_clear_op(brw, 0); + + /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll + * resolve them eventually. + */ + for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + int index = fb->_ColorDrawBufferIndexes[buf]; + + if ((1 << index) & fast_clear_buffers) + irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; + } } if (rep_clear_buffers) { @@ -598,18 +716,6 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, brw_draw_rectlist(ctx, &clear_rect, layers); } - /* Now set the mts we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll - * resolve them eventually. - */ - for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { - struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - int index = fb->_ColorDrawBufferIndexes[buf]; - - if ((1 << index) & fast_clear_buffers) - irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; - } - bail_to_meta: /* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear * color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if @@ -655,8 +761,9 @@ get_resolve_rect(struct brw_context *brw, * * The scaledown factors in the table that follows are related to the * alignment size returned by intel_get_non_msrt_mcs_alignment() by a - * multiplier. For IVB and HSW, we divide by two, for BDW we multiply - * by 8 and 16 and 8 and 8 for SKL. + * multiplier. For IVB and HSW, we divide by two, for BDW we multiply + * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling + * by a factor of 2. */ intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align); @@ -702,6 +809,10 @@ brw_meta_resolve_color(struct brw_context *brw, brw_bind_rep_write_shader(brw, (float *) fast_clear_color); + /* SKL+ also has a resolve mode for compressed render targets and thus more + * bits to let us select the type of resolve. For fast clear resolves, it + * turns out we can use the same value as pre-SKL though. + */ set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 58754adc887..91358d8f389 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -56,8 +56,9 @@ remap_vs_attrs(nir_block *block, void *closure) } static void -brw_nir_lower_inputs(const struct brw_device_info *devinfo, - nir_shader *nir, bool is_scalar) +brw_nir_lower_inputs(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar) { switch (nir->stage) { case MESA_SHADER_VERTEX: @@ -170,131 +171,159 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar) } } -static void +static bool +should_clone_nir() +{ + static int should_clone = -1; + if (should_clone < 1) + should_clone = brw_env_var_as_boolean("NIR_TEST_CLONE", false); + + return should_clone; +} + +#define _OPT(do_pass) (({ \ + bool this_progress = true; \ + do_pass \ + nir_validate_shader(nir); \ + if (should_clone_nir()) { \ + nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ + ralloc_free(nir); \ + nir = clone; \ + } \ + this_progress; \ +})) + +#define OPT(pass, ...) _OPT( \ + nir_metadata_set_validation_flag(nir); \ + this_progress = pass(nir ,##__VA_ARGS__); \ + if (this_progress) { \ + progress = true; \ + nir_metadata_check_validation_flag(nir); \ + } \ +) + +#define OPT_V(pass, ...) _OPT( \ + pass(nir, ##__VA_ARGS__); \ +) + +static nir_shader * nir_optimize(nir_shader *nir, bool is_scalar) { bool progress; do { progress = false; - nir_lower_vars_to_ssa(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_vars_to_ssa); if (is_scalar) { - nir_lower_alu_to_scalar(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_alu_to_scalar); } - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); + OPT(nir_copy_prop); if (is_scalar) { - nir_lower_phis_to_scalar(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_phis_to_scalar); } - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - progress |= nir_opt_dce(nir); - nir_validate_shader(nir); - progress |= nir_opt_cse(nir); - nir_validate_shader(nir); - progress |= nir_opt_peephole_select(nir); - nir_validate_shader(nir); - progress |= nir_opt_algebraic(nir); - nir_validate_shader(nir); - progress |= nir_opt_constant_folding(nir); - nir_validate_shader(nir); - progress |= nir_opt_dead_cf(nir); - nir_validate_shader(nir); - progress |= nir_opt_remove_phis(nir); - nir_validate_shader(nir); - progress |= nir_opt_undef(nir); - nir_validate_shader(nir); + OPT(nir_copy_prop); + OPT(nir_opt_dce); + OPT(nir_opt_cse); + OPT(nir_opt_peephole_select); + OPT(nir_opt_algebraic); + OPT(nir_opt_constant_folding); + OPT(nir_opt_dead_cf); + OPT(nir_opt_remove_phis); + OPT(nir_opt_undef); } while (progress); + + return nir; } +/* Does some simple lowering and runs the standard suite of optimizations + * + * This is intended to be called more-or-less directly after you get the + * shader out of GLSL or some other source. While it is geared towards i965, + * it is not at all generator-specific except for the is_scalar flag. Even + * there, it is safe to call with is_scalar = false for a shader that is + * intended for the FS backend as long as nir_optimize is called again with + * is_scalar = true to scalarize everything prior to code gen. + */ nir_shader * -brw_create_nir(struct brw_context *brw, - const struct gl_shader_program *shader_prog, - const struct gl_program *prog, - gl_shader_stage stage, - bool is_scalar) +brw_preprocess_nir(nir_shader *nir, bool is_scalar) { - struct gl_context *ctx = &brw->ctx; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; - const nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[stage].NirOptions; - nir_shader *nir; + bool progress; /* Written by OPT and OPT_V */ + (void)progress; - /* First, lower the GLSL IR or Mesa IR to NIR */ - if (shader_prog) { - nir = glsl_to_nir(shader_prog, stage, options); - } else { - nir = prog_to_nir(prog, options); - nir_convert_to_ssa(nir); /* turn registers into SSA */ - } - nir_validate_shader(nir); + if (nir->stage == MESA_SHADER_GEOMETRY) + OPT(nir_lower_gs_intrinsics); - brw_preprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); + static const nir_lower_tex_options tex_options = { + .lower_txp = ~0, + }; - if (shader_prog) { - nir_lower_samplers(nir, shader_prog); - nir_validate_shader(nir); + OPT(nir_lower_tex, &tex_options); + OPT(nir_normalize_cubemap_coords); - nir_lower_atomics(nir, shader_prog); - nir_validate_shader(nir); - } + OPT(nir_lower_global_vars_to_local); - brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); + OPT(nir_split_var_copies); - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s NIR shader:\n", - _mesa_shader_stage_to_abbrev(nir->stage)); + nir = nir_optimize(nir, is_scalar); - return nir; -} + /* Lower a bunch of stuff */ + OPT_V(nir_lower_var_copies); -void -brw_preprocess_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar) -{ - static const nir_lower_tex_options tex_options = { - .lower_txp = ~0, - }; + /* Get rid of split copies */ + nir = nir_optimize(nir, is_scalar); - if (nir->stage == MESA_SHADER_GEOMETRY) { - nir_lower_gs_intrinsics(nir); - nir_validate_shader(nir); - } + OPT(nir_remove_dead_variables); - nir_lower_global_vars_to_local(nir); - nir_validate_shader(nir); + return nir; +} - nir_lower_tex(nir, &tex_options); - nir_validate_shader(nir); +/* Lowers inputs, outputs, uniforms, and samplers for i965 + * + * This function does all of the standard lowering prior to post-processing. + * The lowering done is highly gen, stage, and backend-specific. The + * shader_prog parameter is optional and is used only for lowering sampler + * derefs and atomics for GLSL shaders. + */ +nir_shader * +brw_lower_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + bool is_scalar) +{ + bool progress; /* Written by OPT and OPT_V */ + (void)progress; - nir_normalize_cubemap_coords(nir); - nir_validate_shader(nir); + OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); + OPT_V(brw_nir_lower_outputs, is_scalar); + nir_assign_var_locations(&nir->uniforms, + &nir->num_uniforms, + is_scalar ? type_size_scalar : type_size_vec4); + OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); - nir_split_var_copies(nir); - nir_validate_shader(nir); + if (shader_prog) { + OPT_V(nir_lower_samplers, shader_prog); + } - nir_optimize(nir, is_scalar); + OPT(nir_lower_system_values); - /* Lower a bunch of stuff */ - nir_lower_var_copies(nir); - nir_validate_shader(nir); + if (shader_prog) { + OPT_V(nir_lower_atomics, shader_prog); + } - /* Get rid of split copies */ - nir_optimize(nir, is_scalar); + return nir_optimize(nir, is_scalar); } -void +/* Prepare the given shader for codegen + * + * This function is intended to be called right before going into the actual + * backend and is highly backend-specific. Also, once this function has been + * called on a shader, it will no longer be in SSA form so most optimizations + * will not work. + */ +nir_shader * brw_postprocess_nir(nir_shader *nir, const struct brw_device_info *devinfo, bool is_scalar) @@ -302,40 +331,21 @@ brw_postprocess_nir(nir_shader *nir, bool debug_enabled = (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); - brw_nir_lower_inputs(devinfo, nir, is_scalar); - brw_nir_lower_outputs(nir, is_scalar); - nir_assign_var_locations(&nir->uniforms, - &nir->num_uniforms, - is_scalar ? type_size_scalar : type_size_vec4); - nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4); - nir_validate_shader(nir); - - nir_remove_dead_variables(nir); - nir_validate_shader(nir); - - nir_lower_system_values(nir); - nir_validate_shader(nir); - - nir_optimize(nir, is_scalar); + bool progress; /* Written by OPT and OPT_V */ + (void)progress; if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ - brw_nir_opt_peephole_ffma(nir); - nir_validate_shader(nir); + OPT(brw_nir_opt_peephole_ffma); } - nir_opt_algebraic_late(nir); - nir_validate_shader(nir); + OPT(nir_opt_algebraic_late); - nir_lower_locals_to_regs(nir); - nir_validate_shader(nir); + OPT(nir_lower_locals_to_regs); - nir_lower_to_source_mods(nir); - nir_validate_shader(nir); - nir_copy_prop(nir); - nir_validate_shader(nir); - nir_opt_dce(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_to_source_mods); + OPT(nir_copy_prop); + OPT(nir_opt_dce); if (unlikely(debug_enabled)) { /* Re-index SSA defs so we print more sensible numbers. */ @@ -349,15 +359,11 @@ brw_postprocess_nir(nir_shader *nir, nir_print_shader(nir, stderr); } - nir_convert_from_ssa(nir, true); - nir_validate_shader(nir); + OPT_V(nir_convert_from_ssa, true); if (!is_scalar) { - nir_move_vec_src_uses_to_dest(nir); - nir_validate_shader(nir); - - nir_lower_vec_to_movs(nir); - nir_validate_shader(nir); + OPT_V(nir_move_vec_src_uses_to_dest); + OPT(nir_lower_vec_to_movs); } /* This is the last pass we run before we start emitting stuff. It @@ -375,13 +381,83 @@ brw_postprocess_nir(nir_shader *nir, _mesa_shader_stage_to_string(nir->stage)); nir_print_shader(nir, stderr); } + + return nir; +} + +nir_shader * +brw_create_nir(struct brw_context *brw, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + gl_shader_stage stage, + bool is_scalar) +{ + struct gl_context *ctx = &brw->ctx; + const struct brw_device_info *devinfo = brw->intelScreen->devinfo; + const nir_shader_compiler_options *options = + ctx->Const.ShaderCompilerOptions[stage].NirOptions; + bool progress; + nir_shader *nir; + + /* First, lower the GLSL IR or Mesa IR to NIR */ + if (shader_prog) { + nir = glsl_to_nir(shader_prog, stage, options); + } else { + nir = prog_to_nir(prog, options); + OPT_V(nir_convert_to_ssa); /* turn registers into SSA */ + } + nir_validate_shader(nir); + + (void)progress; + + nir = brw_preprocess_nir(nir, is_scalar); + nir = brw_lower_nir(nir, devinfo, shader_prog, is_scalar); + + return nir; +} + +nir_shader * +brw_nir_apply_sampler_key(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct brw_sampler_prog_key_data *key_tex, + bool is_scalar) +{ + nir_lower_tex_options tex_options = { 0 }; + + /* Iron Lake and prior require lowering of all rectangle textures */ + if (devinfo->gen < 6) + tex_options.lower_rect = true; + + /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ + if (devinfo->gen < 8) { + tex_options.saturate_s = key_tex->gl_clamp_mask[0]; + tex_options.saturate_t = key_tex->gl_clamp_mask[1]; + tex_options.saturate_r = key_tex->gl_clamp_mask[2]; + } + + /* Prior to Haswell, we have to fake texture swizzle */ + for (unsigned s = 0; s < MAX_SAMPLERS; s++) { + if (key_tex->swizzles[s] == SWIZZLE_NOOP) + continue; + + tex_options.swizzle_result |= (1 << s); + for (unsigned c = 0; c < 4; c++) + tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); + } + + if (nir_lower_tex(nir, &tex_options)) { + nir_validate_shader(nir); + nir = nir_optimize(nir, is_scalar); + } + + return nir; } enum brw_reg_type brw_type_for_nir_type(nir_alu_type type) { switch (type) { - case nir_type_unsigned: + case nir_type_uint: return BRW_REGISTER_TYPE_UD; case nir_type_bool: case nir_type_int: @@ -408,7 +484,7 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type) case nir_type_int: return GLSL_TYPE_INT; - case nir_type_unsigned: + case nir_type_uint: return GLSL_TYPE_UINT; default: diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index d259777e1c9..0a8a5a280b1 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -81,19 +81,25 @@ nir_shader *brw_create_nir(struct brw_context *brw, gl_shader_stage stage, bool is_scalar); +nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar); +nir_shader *brw_lower_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + bool is_scalar); +nir_shader *brw_postprocess_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar); + + +nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct brw_sampler_prog_key_data *key, + bool is_scalar); + enum brw_reg_type brw_type_for_nir_type(nir_alu_type type); enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type); -void -brw_preprocess_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar); -void -brw_postprocess_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar); - void brw_nir_setup_glsl_uniforms(nir_shader *shader, struct gl_shader_program *shader_prog, const struct gl_program *prog, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6433dec9041..3da8e9e8a97 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -126,6 +126,7 @@ brwProgramStringNotify(struct gl_context *ctx, struct gl_program *prog) { struct brw_context *brw = brw_context(ctx); + const struct brw_compiler *compiler = brw->intelScreen->compiler; switch (target) { case GL_FRAGMENT_PROGRAM_ARB: { @@ -165,7 +166,7 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX, - brw->intelScreen->compiler->scalar_vs); + compiler->scalar_stage[MESA_SHADER_VERTEX]); brw_vs_precompile(ctx, NULL, prog); break; @@ -343,6 +344,8 @@ brw_report_shader_time(struct brw_context *brw) switch (type) { case ST_VS: + case ST_TCS: + case ST_TES: case ST_GS: case ST_FS8: case ST_FS16: @@ -369,6 +372,8 @@ brw_report_shader_time(struct brw_context *brw) switch (type) { case ST_VS: + case ST_TCS: + case ST_TES: case ST_GS: case ST_FS8: case ST_FS16: @@ -406,6 +411,12 @@ brw_report_shader_time(struct brw_context *brw) case ST_VS: stage = "vs"; break; + case ST_TCS: + stage = "tcs"; + break; + case ST_TES: + stage = "tes"; + break; case ST_GS: stage = "gs"; break; @@ -429,6 +440,8 @@ brw_report_shader_time(struct brw_context *brw) fprintf(stderr, "\n"); print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total); + print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total); + print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total); print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total); print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total); print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total); diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 3da83b43b5d..fa912c96c36 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -43,7 +43,6 @@ #define BRW_REG_H #include <stdbool.h> -#include "main/imports.h" #include "main/compiler.h" #include "main/macros.h" #include "program/prog_instruction.h" @@ -619,57 +618,37 @@ static inline struct brw_reg brw_imm_v(unsigned v) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_8; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; imm.ud = v; return imm; } +/** Construct vector of eight unsigned half-byte values */ +static inline struct brw_reg +brw_imm_uv(unsigned uv) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV); + imm.ud = uv; + return imm; +} + /** Construct vector of four 8-bit float values */ static inline struct brw_reg brw_imm_vf(unsigned v) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_4; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; imm.ud = v; return imm; } -/** - * Convert an integer into a "restricted" 8-bit float, used in vector - * immediates. The 8-bit floating point format has a sign bit, an - * excess-3 3-bit exponent, and a 4-bit mantissa. All integer values - * from -31 to 31 can be represented exactly. - */ -static inline uint8_t -int_to_float8(int x) -{ - if (x == 0) { - return 0; - } else if (x < 0) { - return 1 << 7 | int_to_float8(-x); - } else { - const unsigned exponent = _mesa_logbase2(x); - const unsigned mantissa = (x - (1 << exponent)) << (4 - exponent); - assert(exponent <= 4); - return (exponent + 3) << 4 | mantissa; - } -} - -/** - * Construct a floating-point packed vector immediate from its integer - * values. \sa int_to_float8() - */ static inline struct brw_reg -brw_imm_vf4(int v0, int v1, int v2, int v3) +brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) { - return brw_imm_vf((int_to_float8(v0) << 0) | - (int_to_float8(v1) << 8) | - (int_to_float8(v2) << 16) | - (int_to_float8(v3) << 24)); + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); + return imm; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1f3ae7ab5e6..2f0e8b680ab 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -72,22 +72,6 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) va_end(args); } -bool -is_scalar_shader_stage(const struct brw_compiler *compiler, int stage) -{ - switch (stage) { - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - return true; - case MESA_SHADER_GEOMETRY: - return compiler->scalar_gs; - case MESA_SHADER_VERTEX: - return compiler->scalar_vs; - default: - return false; - } -} - struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { @@ -100,11 +84,12 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); - if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) - compiler->scalar_vs = true; - - if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false)) - compiler->scalar_gs = true; + compiler->scalar_stage[MESA_SHADER_VERTEX] = + devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); + compiler->scalar_stage[MESA_SHADER_GEOMETRY] = + devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false); + compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; + compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; nir_shader_compiler_options *nir_options = rzalloc(compiler, nir_shader_compiler_options); @@ -139,7 +124,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; compiler->glsl_compiler_options[i].LowerClipDistance = true; - bool is_scalar = is_scalar_shader_stage(compiler, i); + bool is_scalar = compiler->scalar_stage[i]; compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; @@ -154,6 +139,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; } + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; + return compiler; } @@ -557,6 +545,8 @@ brw_instruction_name(enum opcode op) return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; + case SHADER_OPCODE_MOV_INDIRECT: + return "mov_indirect"; } unreachable("not reached"); @@ -574,16 +564,12 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) switch (type) { case BRW_REGISTER_TYPE_UD: case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: /* Nothing to do. */ return false; - case BRW_REGISTER_TYPE_UW: - sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX); - break; - case BRW_REGISTER_TYPE_W: - sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX); - break; case BRW_REGISTER_TYPE_F: sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index c4a37187ce2..9555406c777 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -252,8 +252,6 @@ int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); int type_size_vec4_times_4(const struct glsl_type *type); -bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage); - #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 0d49ab7b431..69eed4bc629 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -32,8 +32,8 @@ /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. */ -#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ - [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf}, +#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, sf) \ + [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, #sf}, #define Y 0 #define x 999 @@ -61,6 +61,7 @@ * VB - Input Vertex Buffer * SO - Steamed Output Vertex Buffers (transform feedback) * color - Color Processing + * ccs_e - Lossless Compression Support (gen9+ only) * sf - Surface Format * * See page 88 of the Sandybridge PRM VOL4_Part1 PDF. @@ -71,257 +72,258 @@ * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch). * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping. * - VOL4_Part1 section 3.9.11 Render Target Write. + * - Render Target Surface Types [SKL+] */ const struct brw_surface_format_info surface_formats[] = { -/* smpl filt shad CK RT AB VB SO color */ - SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_UINT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R64G64_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) - SF( Y, 50, x, x, x, x, Y, Y, x, R32G32B32_FLOAT) - SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_SINT) - SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_UINT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) - SF( Y, Y, x, x, Y, 45, Y, x, 60, R16G16B16A16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16B16A16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16B16A16_FLOAT) - SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32_FLOAT) - SF( Y, 70, x, x, Y, Y, Y, Y, x, R32G32_FLOAT_LD) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_UINT) - SF( Y, 50, Y, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) - SF( Y, 50, x, x, x, x, x, x, x, L32A32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32G32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, A32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, L32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, I32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R64_PASSTHRU) - SF( Y, Y, x, Y, Y, Y, Y, x, 60, B8G8R8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B8G8R8A8_UNORM_SRGB) -/* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, Y, Y, Y, x, 60, R10G10B10A2_UNORM) - SF( Y, Y, x, x, x, x, x, x, 60, R10G10B10A2_UNORM_SRGB) - SF( Y, x, x, x, Y, x, Y, x, x, R10G10B10A2_UINT) - SF( Y, Y, x, x, x, Y, Y, x, x, R10G10B10_SNORM_A2_UNORM) - SF( Y, Y, x, x, Y, Y, Y, x, 60, R8G8B8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, R8G8B8A8_UNORM_SRGB) - SF( Y, Y, x, x, Y, 60, Y, x, x, R8G8B8A8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_UINT) - SF( Y, Y, x, x, Y, 45, Y, x, x, R16G16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16_FLOAT) - SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, R11G11B10_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32_UINT) - SF( Y, 50, Y, x, Y, Y, Y, Y, x, R32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) - SF( Y, Y, x, x, x, x, x, x, x, L16A16_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, I24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, L24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, A24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, I32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, L32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, A32_FLOAT) - SF( Y, Y, x, Y, x, x, x, x, 60, B8G8R8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) - SF( Y, Y, x, x, x, x, x, x, x, B10G10R10X2_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, L16A16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32_SNORM) -/* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, R10G10B10X2_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32_USCALED) - SF( Y, Y, x, Y, Y, Y, x, x, x, B5G6R5_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B5G6R5_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, B5G5R5A1_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B5G5R5A1_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, B4G4R4A4_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B4G4R4A4_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, R8G8_UNORM) - SF( Y, Y, x, Y, Y, 60, Y, x, x, R8G8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8_UINT) - SF( Y, Y, Y, x, Y, 45, Y, x, 70, R16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16_FLOAT) - SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) - SF( Y, Y, Y, x, x, x, x, x, x, I16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, L16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, A16_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, L8A8_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, I16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, L16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, A16_FLOAT) - SF(45, 45, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) - SF( Y, Y, x, Y, x, x, x, x, x, R5G5_SNORM_B6_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM_SRGB) - SF( x, x, x, x, x, x, Y, x, x, R8G8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8_USCALED) -/* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, R16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16_USCALED) - SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) - SF( x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) - SF( x, x, x, x, x, x, x, x, x, L8A8_UINT) - SF( x, x, x, x, x, x, x, x, x, L8A8_SINT) - SF( Y, Y, x, 45, Y, Y, Y, x, x, R8_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8_UINT) - SF( Y, Y, x, Y, Y, Y, x, x, x, A8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, I8_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, L8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, P4A4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, A4P4_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8_USCALED) - SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, L8_UNORM_SRGB) - SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, Y8_SNORM) - SF( x, x, x, x, x, x, x, x, x, L8_UINT) - SF( x, x, x, x, x, x, x, x, x, L8_SINT) - SF( x, x, x, x, x, x, x, x, x, I8_UINT) - SF( x, x, x, x, x, x, x, x, x, I8_SINT) - SF(45, 45, x, x, x, x, x, x, x, DXT1_RGB_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R1_UINT) - SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_NORMAL) - SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_SWAPUVY) - SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) - SF( Y, Y, x, Y, x, x, x, x, x, BC1_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BC2_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BC3_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC5_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC1_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BC2_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BC3_UNORM_SRGB) - SF( Y, x, x, x, x, x, x, x, x, MONO8) - SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPUV) - SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPY) - SF( Y, Y, x, x, x, x, x, x, x, DXT1_RGB) -/* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, x, x, x, x, x, FXT1) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R64G64B64A64_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R64G64B64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, BC4_SNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC5_SNORM) - SF(50, 50, x, x, x, x, 60, x, x, R16G16B16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_USCALED) - SF(70, 70, x, x, x, x, x, x, x, BC6H_SF16) - SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM) - SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM_SRGB) - SF(70, 70, x, x, x, x, x, x, x, BC6H_UF16) - SF( x, x, x, x, x, x, x, x, x, PLANAR_420_8) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) - SF( x, x, x, x, x, x, x, x, x, ETC1_RGB8) - SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8) - SF( x, x, x, x, x, x, x, x, x, EAC_R11) - SF( x, x, x, x, x, x, x, x, x, EAC_RG11) - SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) - SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) - SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8) - SF( x, x, x, x, x, x, x, x, x, R16G16B16_UINT) - SF( x, x, x, x, x, x, x, x, x, R16G16B16_SINT) - SF( x, x, x, x, x, x, x, x, x, R32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) - SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) + SF( Y, 50, x, x, x, x, Y, Y, x, x, R32G32B32_FLOAT) + SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_SINT) + SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) + SF( Y, Y, x, x, Y, 45, Y, x, 60, 90, R16G16B16A16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16B16A16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16B16A16_FLOAT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32_FLOAT) + SF( Y, 70, x, x, Y, Y, Y, Y, x, x, R32G32_FLOAT_LD) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_UINT) + SF( Y, 50, Y, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) + SF( Y, 50, x, x, x, x, x, x, x, x, L32A32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, 90, R16G16B16X16_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, A32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, L32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, I32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R64_PASSTHRU) + SF( Y, Y, x, Y, Y, Y, Y, x, 60, 90, B8G8R8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B8G8R8A8_UNORM_SRGB) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, Y, x, x, Y, Y, Y, x, 60, x, R10G10B10A2_UNORM) + SF( Y, Y, x, x, x, x, x, x, 60, x, R10G10B10A2_UNORM_SRGB) + SF( Y, x, x, x, Y, x, Y, x, x, x, R10G10B10A2_UINT) + SF( Y, Y, x, x, x, Y, Y, x, x, x, R10G10B10_SNORM_A2_UNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, 90, R8G8B8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, R8G8B8A8_UNORM_SRGB) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R8G8B8A8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_UINT) + SF( Y, Y, x, x, Y, 45, Y, x, x, 90, R16G16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16_FLOAT) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R11G11B10_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_UINT) + SF( Y, 50, Y, x, Y, Y, Y, Y, x, 90, R32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) + SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, I24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, L24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, A24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, I32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, L32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, A32_FLOAT) + SF( Y, Y, x, Y, x, x, x, x, 60, 90, B8G8R8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) + SF( Y, Y, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32_SNORM) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, R10G10B10X2_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32_USCALED) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G6R5_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G6R5_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G5R5A1_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G5R5A1_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B4G4R4A4_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B4G4R4A4_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R8G8_UNORM) + SF( Y, Y, x, Y, Y, 60, Y, x, x, x, R8G8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_UINT) + SF( Y, Y, Y, x, Y, 45, Y, x, 70, x, R16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, x, R16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R16_FLOAT) + SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) + SF( Y, Y, Y, x, x, x, x, x, x, x, I16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, L16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, A16_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, L8A8_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, I16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, L16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, A16_FLOAT) + SF(45, 45, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) + SF( Y, Y, x, Y, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM_SRGB) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8_USCALED) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, R16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16_USCALED) + SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) + SF( x, x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT) + SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, x, R8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8_UINT) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, A8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, I8_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, L8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, P4A4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, A4P4_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8_USCALED) + SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, x, L8_UNORM_SRGB) + SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, Y8_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, L8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, L8_SINT) + SF( x, x, x, x, x, x, x, x, x, x, I8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, I8_SINT) + SF(45, 45, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R1_UINT) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_NORMAL) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_SWAPUVY) + SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC1_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC2_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC3_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC5_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB) + SF( Y, x, x, x, x, x, x, x, x, x, MONO8) + SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPUV) + SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPY) + SF( Y, Y, x, x, x, x, x, x, x, x, DXT1_RGB) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, Y, x, x, x, x, x, x, x, x, FXT1) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64A64_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, BC4_SNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC5_SNORM) + SF(50, 50, x, x, x, x, 60, x, x, x, R16G16B16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_USCALED) + SF(70, 70, x, x, x, x, x, x, x, x, BC6H_SF16) + SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM) + SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB) + SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16) + SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) + SF( x, x, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF( x, x, x, x, x, x, x, x, x, x, EAC_R11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_RG11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_UINT) + SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_SINT) + SF( x, x, x, x, x, x, x, x, x, x, R32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_SINT) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB) }; #undef x #undef Y @@ -771,6 +773,26 @@ brw_render_target_supported(struct brw_context *brw, return brw->format_supported_as_render_target[format]; } +/* + * True if the underlying hardware format can support lossless color + * compression. + */ +bool +brw_losslessly_compressible_format(struct brw_context *brw, + uint32_t brw_format) +{ + const struct brw_surface_format_info * const sinfo = + &surface_formats[brw_format]; + const int gen = brw->gen * 10; + + assert(brw->gen >= 9); + + if (gen >= sinfo->lossless_compression) + return true; + + return false; +} + GLuint translate_tex_format(struct brw_context *brw, mesa_format mesa_format, diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.h b/src/mesa/drivers/dri/i965/brw_surface_formats.h index 5c7b60e680b..a5cd49f5260 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.h +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.h @@ -34,6 +34,7 @@ struct brw_surface_format_info { int input_vb; int streamed_output_vb; int color_processing; + int lossless_compression; const char *name; }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a086b43e11a..ae3cf728443 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -71,51 +71,6 @@ src_reg::src_reg() init(); } -src_reg::src_reg(float f) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_F; - this->f = f; -} - -src_reg::src_reg(uint32_t u) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_UD; - this->ud = u; -} - -src_reg::src_reg(int32_t i) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_D; - this->d = i; -} - -src_reg::src_reg(uint8_t vf[4]) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - memcpy(&this->ud, vf, sizeof(unsigned)); -} - -src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24); -} - src_reg::src_reg(struct brw_reg reg) : backend_reg(reg) { @@ -382,7 +337,9 @@ vec4_visitor::opt_vector_float() remaining_channels &= ~inst->dst.writemask; if (remaining_channels == 0) { - vec4_instruction *mov = MOV(inst->dst, imm); + unsigned vf; + memcpy(&vf, imm, sizeof(vf)); + vec4_instruction *mov = MOV(inst->dst, brw_imm_vf(vf)); mov->dst.type = BRW_REGISTER_TYPE_F; mov->dst.writemask = WRITEMASK_XYZW; inst->insert_after(block, mov); @@ -657,13 +614,13 @@ vec4_visitor::opt_algebraic() inst->opcode = BRW_OPCODE_MOV; switch (inst->src[0].type) { case BRW_REGISTER_TYPE_F: - inst->src[0] = src_reg(0.0f); + inst->src[0] = brw_imm_f(0.0f); break; case BRW_REGISTER_TYPE_D: - inst->src[0] = src_reg(0); + inst->src[0] = brw_imm_d(0); break; case BRW_REGISTER_TYPE_UD: - inst->src[0] = src_reg(0u); + inst->src[0] = brw_imm_ud(0u); break; default: unreachable("not reached"); @@ -1232,7 +1189,7 @@ vec4_visitor::eliminate_find_live_channel() case SHADER_OPCODE_FIND_LIVE_CHANNEL: if (depth == 0) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = src_reg(0); + inst->src[0] = brw_imm_d(0); inst->force_writemask_all = true; progress = true; } @@ -1701,7 +1658,7 @@ vec4_visitor::emit_shader_time_end() */ src_reg reset_end = shader_end_time; reset_end.swizzle = BRW_SWIZZLE_ZZZZ; - vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u))); + vec4_instruction *test = emit(AND(dst_null_ud(), reset_end, brw_imm_ud(1u))); test->conditional_mod = BRW_CONDITIONAL_Z; emit(IF(BRW_PREDICATE_NORMAL)); @@ -1715,12 +1672,12 @@ vec4_visitor::emit_shader_time_end() * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - emit(ADD(diff, src_reg(diff), src_reg(-2u))); + emit(ADD(diff, src_reg(diff), brw_imm_ud(-2u))); emit_shader_time_write(0, src_reg(diff)); - emit_shader_time_write(1, src_reg(1u)); + emit_shader_time_write(1, brw_imm_ud(1u)); emit(BRW_OPCODE_ELSE); - emit_shader_time_write(2, src_reg(1u)); + emit_shader_time_write(2, brw_imm_ud(1u)); emit(BRW_OPCODE_ENDIF); } @@ -1736,7 +1693,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) offset.type = BRW_REGISTER_TYPE_UD; int index = shader_time_index * 3 + shader_time_subindex; - emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); + emit(MOV(offset, brw_imm_d(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; emit(MOV(time, value)); @@ -1762,11 +1719,6 @@ vec4_visitor::convert_to_hw_regs() reg.negate = src.negate; break; - case IMM: - reg = brw_imm_reg(src.type); - reg.ud = src.ud; - break; - case UNIFORM: reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg + (src.nr + src.reg_offset) / 2, @@ -1783,6 +1735,7 @@ vec4_visitor::convert_to_hw_regs() case ARF: case FIXED_GRF: + case IMM: continue; case BAD_FILE: @@ -1978,13 +1931,19 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, gl_clip_plane *clip_planes, bool use_legacy_snorm_formula, int shader_time_index, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + compiler->scalar_stage[MESA_SHADER_VERTEX]); + shader = brw_postprocess_nir(shader, compiler->devinfo, + compiler->scalar_stage[MESA_SHADER_VERTEX]); + const unsigned *assembly = NULL; unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read); @@ -2002,7 +1961,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in * vec4 mode, the hardware appears to wedge unless we read something. */ - if (compiler->scalar_vs) + if (compiler->scalar_stage[MESA_SHADER_VERTEX]) prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2); else prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2); @@ -2021,7 +1980,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, else prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); - if (compiler->scalar_vs) { + if (compiler->scalar_stage[MESA_SHADER_VERTEX]) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 52d68c5a33d..f94f7128a07 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -276,14 +276,9 @@ public: uint32_t surface, src_reg surface_reg, uint32_t sampler, src_reg sampler_reg); - uint32_t gather_channel(unsigned gather_component, - uint32_t surface, uint32_t sampler); src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, src_reg sampler); void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); - void swizzle_result(ir_texture_opcode op, dst_reg dest, - src_reg orig_val, uint32_t sampler, - const glsl_type *dest_type); void emit_ndc_computation(); void emit_psiz_and_flags(dst_reg reg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h index a76a4ce4639..be1427c7db7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h @@ -484,7 +484,7 @@ namespace brw { const dst_reg x_times_one_minus_a = vgrf(dst.type); MUL(y_times_a, y, a); - ADD(one_minus_a, negate(a), src_reg(1.0f)); + ADD(one_minus_a, negate(a), brw_imm_f(1.0f)); MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 1a09f76a20c..b13d36e2c7d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -30,6 +30,7 @@ #include "brw_vec4_gs_visitor.h" #include "gen6_gs_visitor.h" #include "brw_fs.h" +#include "brw_nir.h" namespace brw { @@ -153,7 +154,7 @@ vec4_gs_visitor::emit_prolog() */ this->current_annotation = "clear r0.2"; dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); - vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u); + vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u)); inst->force_writemask_all = true; /* Create a virtual register to hold the vertex count */ @@ -161,7 +162,7 @@ vec4_gs_visitor::emit_prolog() /* Initialize the vertex_count register to 0 */ this->current_annotation = "initialize vertex_count"; - inst = emit(MOV(dst_reg(this->vertex_count), 0u)); + inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u))); inst->force_writemask_all = true; if (c->control_data_header_size_bits > 0) { @@ -176,7 +177,7 @@ vec4_gs_visitor::emit_prolog() */ if (c->control_data_header_size_bits <= 32) { this->current_annotation = "initialize control data bits"; - inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); + inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u))); inst->force_writemask_all = true; } } @@ -274,7 +275,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf) vec4_instruction *inst = emit(MOV(mrf_reg, r0)); inst->force_writemask_all = true; emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count, - (uint32_t) gs_prog_data->output_vertex_size_hwords); + brw_imm_ud(gs_prog_data->output_vertex_size_hwords)); } @@ -354,11 +355,12 @@ vec4_gs_visitor::emit_control_data_bits() src_reg dword_index(this, glsl_type::uint_type); if (urb_write_flags) { src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + emit(ADD(dst_reg(prev_count), this->vertex_count, + brw_imm_ud(0xffffffffu))); unsigned log2_bits_per_vertex = _mesa_fls(c->control_data_bits_per_vertex); emit(SHR(dst_reg(dword_index), prev_count, - (uint32_t) (6 - log2_bits_per_vertex))); + brw_imm_ud(6 - log2_bits_per_vertex))); } /* Start building the URB write message. The first MRF gets a copy of @@ -375,8 +377,9 @@ vec4_gs_visitor::emit_control_data_bits() * the appropriate OWORD within the control data header. */ src_reg per_slot_offset(this, glsl_type::uint_type); - emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); - emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); + emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u))); + emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, + brw_imm_ud(1u)); } if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { @@ -388,10 +391,10 @@ vec4_gs_visitor::emit_control_data_bits() * together. */ src_reg channel(this, glsl_type::uint_type); - inst = emit(AND(dst_reg(channel), dword_index, 3u)); + inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u))); inst->force_writemask_all = true; src_reg one(this, glsl_type::uint_type); - inst = emit(MOV(dst_reg(one), 1u)); + inst = emit(MOV(dst_reg(one), brw_imm_ud(1u))); inst->force_writemask_all = true; src_reg channel_mask(this, glsl_type::uint_type); inst = emit(SHL(dst_reg(channel_mask), one, channel)); @@ -441,11 +444,11 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id) /* reg::sid = stream_id */ src_reg sid(this, glsl_type::uint_type); - emit(MOV(dst_reg(sid), stream_id)); + emit(MOV(dst_reg(sid), brw_imm_ud(stream_id))); /* reg:shift_count = 2 * (vertex_count - 1) */ src_reg shift_count(this, glsl_type::uint_type); - emit(SHL(dst_reg(shift_count), this->vertex_count, 1u)); + emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u))); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -503,8 +506,8 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) * vertex_count & (32 / bits_per_vertex - 1) == 0 */ vec4_instruction *inst = - emit(AND(dst_null_d(), this->vertex_count, - (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); + emit(AND(dst_null_ud(), this->vertex_count, + brw_imm_ud(32 / c->control_data_bits_per_vertex - 1))); inst->conditional_mod = BRW_CONDITIONAL_Z; emit(IF(BRW_PREDICATE_NORMAL)); @@ -512,7 +515,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) /* If vertex_count is 0, then no control data bits have been * accumulated yet, so we skip emitting them. */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, + emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ)); emit(IF(BRW_PREDICATE_NORMAL)); emit_control_data_bits(); @@ -525,7 +528,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) * effect of any call to EndPrimitive() that the shader may have * made before outputting its first vertex. */ - inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); + inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u))); inst->force_writemask_all = true; } emit(BRW_OPCODE_ENDIF); @@ -586,9 +589,9 @@ vec4_gs_visitor::gs_end_primitive() /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ src_reg one(this, glsl_type::uint_type); - emit(MOV(dst_reg(one), 1u)); + emit(MOV(dst_reg(one), brw_imm_ud(1u))); src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu))); src_reg mask(this, glsl_type::uint_type); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -604,7 +607,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, struct gl_shader_program *shader_prog, int shader_time_index, unsigned *final_assembly_size, @@ -614,6 +617,12 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, memset(&c, 0, sizeof(c)); c.key = *key; + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + compiler->scalar_stage[MESA_SHADER_GEOMETRY]); + shader = brw_postprocess_nir(shader, compiler->devinfo, + compiler->scalar_stage[MESA_SHADER_GEOMETRY]); + prog_data->include_primitive_id = (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0; @@ -773,7 +782,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; if (output_size_bytes > max_output_size_bytes) - return false; + return NULL; /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and @@ -819,7 +828,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, brw_print_vue_map(stderr, &prog_data->base.vue_map); } - if (compiler->scalar_gs) { + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) { /* TODO: Support instanced GS. We have basically no tests... */ assert(prog_data->invocations == 1); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index bf098b41590..260b515ad42 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -122,7 +122,7 @@ vec4_visitor::nir_setup_inputs() { nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs); for (unsigned i = 0; i < nir->num_inputs; i++) { - nir_inputs[i] = dst_reg(); + nir_inputs[i] = src_reg(); } nir_foreach_variable(var, &nir->inputs) { @@ -373,7 +373,7 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) } reg.writemask = writemask; - emit(MOV(reg, src_reg(instr->value.i[i]))); + emit(MOV(reg, brw_imm_d(instr->value.i[i]))); remaining &= ~writemask; } @@ -444,10 +444,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) inst->base_mrf = 2; inst->mlen = 1; /* always at least one */ - inst->src[1] = src_reg(index); + inst->src[1] = brw_imm_ud(index); /* MRF for the first parameter */ - src_reg lod = src_reg(0); + src_reg lod = brw_imm_d(0); int param_base = inst->base_mrf; int writemask = WRITEMASK_X; emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod)); @@ -471,12 +471,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (const_uniform_block) { unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = src_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1), - src_reg(prog_data->base.binding_table.ssbo_start))); + brw_imm_ud(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); brw_mark_surface_used(&prog_data->base, @@ -491,7 +491,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1))); } else { const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes))); + emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset_bytes))); } /* Value */ @@ -566,7 +566,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (skipped_channels > 0) { if (!has_indirect) { const_offset_bytes += 4 * skipped_channels; - offset_reg = src_reg(const_offset_bytes); + offset_reg = brw_imm_ud(const_offset_bytes); } else { emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(4 * skipped_channels))); @@ -614,13 +614,13 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (const_uniform_block) { unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = src_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1), - src_reg(prog_data->base.binding_table.ssbo_start))); + brw_imm_ud(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide @@ -637,7 +637,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1))); } else { const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes))); + emit(MOV(dst_reg(offset_reg), brw_imm_ud((const_offset_bytes)))); } /* Read the vector */ @@ -762,7 +762,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ const unsigned index = prog_data->base.binding_table.ubo_start + const_block_index->u[0]; - surf_index = src_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { /* The block index is not a constant. Evaluate the index expression @@ -772,7 +772,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int, instr->num_components), - src_reg(prog_data->base.binding_table.ubo_start))); + brw_imm_ud(prog_data->base.binding_table.ubo_start))); surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide @@ -787,11 +787,11 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg offset; if (!has_indirect) { - offset = src_reg(const_offset / 16); + offset = brw_imm_ud(const_offset / 16); } else { offset = src_reg(this, glsl_type::uint_type); emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1), - src_reg(4u))); + brw_imm_ud(4u))); } src_reg packed_consts = src_reg(this, glsl_type::vec4_type); @@ -848,12 +848,12 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr) if (const_surface) { unsigned surf_index = prog_data->base.binding_table.ssbo_start + const_surface->u[0]; - surface = src_reg(surf_index); + surface = brw_imm_ud(surf_index); brw_mark_surface_used(&prog_data->base, surf_index); } else { surface = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]), - src_reg(prog_data->base.binding_table.ssbo_start))); + brw_imm_ud(prog_data->base.binding_table.ssbo_start))); /* Assume this may touch any UBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. @@ -1174,8 +1174,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); - emit(MOV(dst, src_reg(0))); - inst = emit(MOV(dst, src_reg(~0))); + emit(MOV(dst, brw_imm_d(0))); + inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; break; } @@ -1192,8 +1192,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); - emit(MOV(dst, src_reg(0))); - inst = emit(MOV(dst, src_reg(~0))); + emit(MOV(dst, brw_imm_d(0))); + inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; } @@ -1235,11 +1235,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_f2b: - emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); + emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); break; case nir_op_i2b: - emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); break; case nir_op_fnoise1_1: @@ -1321,9 +1321,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * subtract the result from 31 to convert the MSB count into an LSB count. */ src_reg src(dst); - emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ)); + emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ)); - inst = emit(ADD(dst, src, src_reg(31))); + inst = emit(ADD(dst, src, brw_imm_d(31))); inst->predicate = BRW_PREDICATE_NORMAL; inst->src[0].negate = true; break; @@ -1364,13 +1364,13 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not * zero. */ - emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); + emit(CMP(dst_null_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); op[0].type = BRW_REGISTER_TYPE_UD; dst.type = BRW_REGISTER_TYPE_UD; - emit(AND(dst, op[0], src_reg(0x80000000u))); + emit(AND(dst, op[0], brw_imm_ud(0x80000000u))); - inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u))); + inst = emit(OR(dst, src_reg(dst), brw_imm_ud(0x3f800000u))); inst->predicate = BRW_PREDICATE_NORMAL; dst.type = BRW_REGISTER_TYPE_F; @@ -1385,9 +1385,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * -> non-negative val generates 0x00000000. * Predicated OR sets 1 if val is positive. */ - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G)); - emit(ASR(dst, op[0], src_reg(31))); - inst = emit(OR(dst, src_reg(dst), src_reg(1))); + emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G)); + emit(ASR(dst, op[0], brw_imm_d(31))); + inst = emit(OR(dst, src_reg(dst), brw_imm_d(1))); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -1418,7 +1418,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_bcsel: - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]); switch (dst.writemask) { case WRITEMASK_X: @@ -1465,10 +1465,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) unsigned swiz = brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); - emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0), + emit(CMP(dst_null_d(), swizzle(op[0], swiz), brw_imm_d(0), BRW_CONDITIONAL_NZ)); - emit(MOV(dst, src_reg(0))); - inst = emit(MOV(dst, src_reg(~0))); + emit(MOV(dst, brw_imm_d(0))); + inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; } @@ -1502,7 +1502,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { dst_reg masked = dst_reg(this, glsl_type::int_type); masked.writemask = dst.writemask; - emit(AND(masked, src_reg(dst), src_reg(1))); + emit(AND(masked, src_reg(dst), brw_imm_d(1))); src_reg masked_neg = src_reg(masked); masked_neg.negate = true; emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg)); @@ -1551,6 +1551,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop) case nir_texop_txf_ms: op = ir_txf_ms; break; case nir_texop_txl: op = ir_txl; break; case nir_texop_txs: op = ir_txs; break; + case nir_texop_samples_identical: op = ir_samples_identical; break; default: unreachable("unknown texture opcode"); } @@ -1566,7 +1567,7 @@ glsl_type_for_nir_alu_type(nir_alu_type alu_type, return glsl_type::vec(components); case nir_type_int: return glsl_type::ivec(components); - case nir_type_unsigned: + case nir_type_uint: return glsl_type::uvec(components); case nir_type_bool: return glsl_type::bvec(components); @@ -1582,8 +1583,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) { unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; - src_reg texture_reg = src_reg(texture); - src_reg sampler_reg = src_reg(sampler); + src_reg texture_reg = brw_imm_ud(texture); + src_reg sampler_reg = brw_imm_ud(sampler); src_reg coordinate; const glsl_type *coord_type = NULL; src_reg shadow_comparitor; @@ -1597,17 +1598,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) nir_tex_instr_dest_size(instr)); dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); - /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother - * emitting anything other than setting up the constant result. - */ - if (instr->op == nir_texop_tg4) { - int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component); - if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { - emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f))); - return; - } - } - /* Load the texture operation sources */ for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { @@ -1622,6 +1612,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: + case nir_texop_samples_identical: coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, src_size); coord_type = glsl_type::ivec(src_size); @@ -1661,14 +1652,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) case nir_tex_src_ms_index: { sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); - assert(coord_type != NULL); - if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << texture)) { - mcs = emit_mcs_fetch(coord_type, coordinate, texture_reg); - } else { - mcs = src_reg(0u); - } - mcs = retype(mcs, BRW_REGISTER_TYPE_UD); break; } @@ -1693,7 +1676,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); - emit(ADD(dst_reg(temp), src, src_reg(texture))); + emit(ADD(dst_reg(temp), src, brw_imm_ud(texture))); texture_reg = emit_uniformize(temp); break; } @@ -1702,7 +1685,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); - emit(ADD(dst_reg(temp), src, src_reg(sampler))); + emit(ADD(dst_reg(temp), src, brw_imm_ud(sampler))); sampler_reg = emit_uniformize(temp); break; } @@ -1718,6 +1701,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) } } + if (instr->op == nir_texop_txf_ms || + instr->op == nir_texop_samples_identical) { + assert(coord_type != NULL); + if (devinfo->gen >= 7 && + key_tex->compressed_multisample_layout_mask & (1 << sampler)) { + mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg); + } else { + mcs = brw_imm_ud(0u); + } + } + uint32_t constant_offset = 0; for (unsigned i = 0; i < 3; i++) { if (instr->const_offset[i] != 0) { @@ -1727,8 +1721,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) } /* Stuff the channel select bits in the top of the texture offset */ - if (instr->op == nir_texop_tg4) - constant_offset |= gather_channel(instr->component, texture, sampler) << 16; + if (instr->op == nir_texop_tg4) { + if (instr->component == 1 && + (key_tex->gather_channel_quirk_mask & (1 << texture))) { + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + constant_offset |= 2 << 16; + } else { + constant_offset |= instr->component << 16; + } + } ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp index a7c286d3ac1..28002c56cdc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp @@ -71,7 +71,7 @@ namespace { bld.MOV(writemask(tmp, mask), src); if (n < 4) - bld.MOV(writemask(tmp, ~mask), 0); + bld.MOV(writemask(tmp, ~mask), brw_imm_d(0)); return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1); } @@ -143,7 +143,7 @@ namespace brw { /* Emit the message send instruction. */ const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz); vec4_instruction *inst = - bld.emit(op, dst, src_reg(payload), usurface, arg); + bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg)); inst->mlen = sz; inst->regs_written = ret_sz; inst->header_size = header_sz; @@ -235,7 +235,7 @@ namespace brw { const vec4_builder ubld = bld.exec_all(); const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); - ubld.MOV(dst, src_reg(0)); + ubld.MOV(dst, brw_imm_d(0)); if (bld.shader->devinfo->gen == 7 && !bld.shader->devinfo->is_haswell) { @@ -243,7 +243,7 @@ namespace brw { * have no SIMD4x2 variant. We only use the two X channels * in that case, mask everything else out. */ - ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11)); + ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11)); } return src_reg(dst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6b8798da71c..caf1ee02bf0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -408,7 +408,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * You should inspect the disasm output in order to verify that the MOV is * not optimized away. */ - emit(MOV(tmp_dst, src_reg(0x12345678u))); + emit(MOV(tmp_dst, brw_imm_ud(0x12345678u))); #endif /* Give tmp the form below, where "." means untouched. @@ -427,7 +427,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * 0xhhhh0000 */ tmp_src.swizzle = BRW_SWIZZLE_YYYY; - emit(SHL(dst, tmp_src, src_reg(16u))); + emit(SHL(dst, tmp_src, brw_imm_ud(16u))); /* Finally, give the write-channels of dst the form of packHalf2x16's * output: @@ -466,10 +466,10 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0) src_reg tmp_src(tmp_dst); tmp_dst.writemask = WRITEMASK_X; - emit(AND(tmp_dst, src0, src_reg(0xffffu))); + emit(AND(tmp_dst, src0, brw_imm_ud(0xffffu))); tmp_dst.writemask = WRITEMASK_Y; - emit(SHR(tmp_dst, src0, src_reg(16u))); + emit(SHR(tmp_dst, src0, brw_imm_ud(16u))); dst.writemask = WRITEMASK_XY; emit(F16TO32(dst, tmp_src)); @@ -484,7 +484,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0) * vector float and a type-converting MOV. */ dst_reg shift(this, glsl_type::uvec4_type); - emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78))); + emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78))); dst_reg shifted(this, glsl_type::uvec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; @@ -494,7 +494,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0) dst_reg f(this, glsl_type::vec4_type); emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted)); - emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f))); + emit(MUL(dst, src_reg(f), brw_imm_f(1.0f / 255.0f))); } void @@ -506,7 +506,7 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0) * vector float and a type-converting MOV. */ dst_reg shift(this, glsl_type::uvec4_type); - emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78))); + emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78))); dst_reg shifted(this, glsl_type::uvec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; @@ -517,11 +517,11 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0) emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted)); dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f))); + emit(MUL(scaled, src_reg(f), brw_imm_f(1.0f / 127.0f))); dst_reg max(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f)); - emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), brw_imm_f(-1.0f)); + emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), brw_imm_f(1.0f)); } void @@ -532,7 +532,7 @@ vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0) inst->saturate = true; dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(saturated), src_reg(255.0f))); + emit(MUL(scaled, src_reg(saturated), brw_imm_f(255.0f))); dst_reg rounded(this, glsl_type::vec4_type); emit(RNDE(rounded, src_reg(scaled))); @@ -548,13 +548,13 @@ void vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) { dst_reg max(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, max, src0, brw_imm_f(-1.0f)); dst_reg min(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f)); + emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), brw_imm_f(1.0f)); dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(min), src_reg(127.0f))); + emit(MUL(scaled, src_reg(min), brw_imm_f(127.0f))); dst_reg rounded(this, glsl_type::vec4_type); emit(RNDE(rounded, src_reg(scaled))); @@ -716,7 +716,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst, x_times_one_minus_a.writemask = dst.writemask; emit(MUL(y_times_a, y, a)); - emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); + emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f))); emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); } @@ -850,7 +850,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, coordinate)); emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask), - src_reg(0))); + brw_imm_d(0))); emit(inst); return src_reg(inst->dst); @@ -892,7 +892,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, */ if (op == ir_tex || op == ir_query_levels) { assert(lod.file == BAD_FILE); - lod = src_reg(0.0f); + lod = brw_imm_f(0.0f); } enum opcode opcode; @@ -912,12 +912,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op, unreachable("TXB is not valid for vertex shaders."); case ir_lod: unreachable("LOD is not valid for vertex shaders."); + case ir_samples_identical: { + /* There are some challenges implementing this for vec4, and it seems + * unlikely to be used anyway. For now, just return false ways. + */ + emit(MOV(dest, brw_imm_ud(0u))); + return; + } default: unreachable("Unrecognized tex op"); } - vec4_instruction *inst = new(mem_ctx) vec4_instruction( - opcode, dst_reg(this, dest_type)); + vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest); inst->offset = constant_offset; @@ -963,7 +969,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, if (zero_mask != 0) { emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask), - src_reg(0))); + brw_imm_d(0))); } /* Load the shadow comparitor */ if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) { @@ -1062,15 +1068,20 @@ vec4_visitor::emit_texture(ir_texture_opcode op, if (op == ir_txs && is_cube_array) { emit_math(SHADER_OPCODE_INT_QUOTIENT, writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), src_reg(6)); + src_reg(inst->dst), brw_imm_d(6)); } if (devinfo->gen == 6 && op == ir_tg4) { emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst); } - swizzle_result(op, dest, - src_reg(inst->dst), sampler, dest_type); + if (op == ir_query_levels) { + /* # levels is in .w */ + src_reg swizzled(dest); + swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, + SWIZZLE_W, SWIZZLE_W); + emit(MOV(dest, swizzled)); + } } /** @@ -1087,7 +1098,7 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) dst_f.type = BRW_REGISTER_TYPE_F; /* Convert from UNORM to UINT */ - emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1)))); + emit(MUL(dst_f, src_reg(dst_f), brw_imm_f((float)((1 << width) - 1)))); emit(MOV(dst, src_reg(dst_f))); if (wa & WA_SIGN) { @@ -1095,90 +1106,8 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) * shifting the sign bit into place, then shifting back * preserving sign. */ - emit(SHL(dst, src_reg(dst), src_reg(32 - width))); - emit(ASR(dst, src_reg(dst), src_reg(32 - width))); - } -} - -/** - * Set up the gather channel based on the swizzle, for gather4. - */ -uint32_t -vec4_visitor::gather_channel(unsigned gather_component, - uint32_t surface, uint32_t sampler) -{ - int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component); - switch (swiz) { - case SWIZZLE_X: return 0; - case SWIZZLE_Y: - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - if (key_tex->gather_channel_quirk_mask & (1 << surface)) - return 2; - return 1; - case SWIZZLE_Z: return 2; - case SWIZZLE_W: return 3; - default: - unreachable("Not reached"); /* zero, one swizzles handled already */ - } -} - -void -vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest, - src_reg orig_val, uint32_t sampler, - const glsl_type *dest_type) -{ - int s = key_tex->swizzles[sampler]; - - dst_reg swizzled_result = dest; - - if (op == ir_query_levels) { - /* # levels is in .w */ - orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - emit(MOV(swizzled_result, orig_val)); - return; - } - - if (op == ir_txs || dest_type == glsl_type::float_type - || s == SWIZZLE_NOOP || op == ir_tg4) { - emit(MOV(swizzled_result, orig_val)); - return; - } - - - int zero_mask = 0, one_mask = 0, copy_mask = 0; - int swizzle[4] = {0}; - - for (int i = 0; i < 4; i++) { - switch (GET_SWZ(s, i)) { - case SWIZZLE_ZERO: - zero_mask |= (1 << i); - break; - case SWIZZLE_ONE: - one_mask |= (1 << i); - break; - default: - copy_mask |= (1 << i); - swizzle[i] = GET_SWZ(s, i); - break; - } - } - - if (copy_mask) { - orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - swizzled_result.writemask = copy_mask; - emit(MOV(swizzled_result, orig_val)); - } - - if (zero_mask) { - swizzled_result.writemask = zero_mask; - emit(MOV(swizzled_result, src_reg(0.0f))); - } - - if (one_mask) { - swizzled_result.writemask = one_mask; - emit(MOV(swizzled_result, src_reg(1.0f))); + emit(SHL(dst, src_reg(dst), brw_imm_d(32 - width))); + emit(ASR(dst, src_reg(dst), brw_imm_d(32 - width))); } } @@ -1225,7 +1154,7 @@ vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, */ vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, - src_reg(surf_index), src_reg(atomic_op)); + brw_imm_ud(surf_index), brw_imm_ud(atomic_op)); inst->mlen = mlen; } @@ -1245,7 +1174,7 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst, */ vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_reg(offset), - src_reg(surf_index), src_reg(1)); + brw_imm_ud(surf_index), brw_imm_d(1)); inst->mlen = 1; } @@ -1286,14 +1215,14 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg header1_w = header1; header1_w.writemask = WRITEMASK_W; - emit(MOV(header1, 0u)); + emit(MOV(header1, brw_imm_ud(0u))); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]); current_annotation = "Point size"; - emit(MUL(header1_w, psiz, src_reg((float)(1 << 11)))); - emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8)); + emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11)))); + emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8))); } if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) { @@ -1301,13 +1230,13 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg flags0 = dst_reg(this, glsl_type::uint_type); dst_reg flags1 = dst_reg(this, glsl_type::uint_type); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0)); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0)); emit(OR(header1_w, src_reg(header1_w), src_reg(flags0))); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0)); - emit(SHL(flags1, src_reg(flags1), src_reg(4))); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0)); + emit(SHL(flags1, src_reg(flags1), brw_imm_d(4))); emit(OR(header1_w, src_reg(header1_w), src_reg(flags1))); } @@ -1324,20 +1253,20 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) { src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]); ndc_w.swizzle = BRW_SWIZZLE_WWWW; - emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L)); + emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L)); vec4_instruction *inst; - inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6))); + inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6))); inst->predicate = BRW_PREDICATE_NORMAL; output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F; - inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f))); + inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f))); inst->predicate = BRW_PREDICATE_NORMAL; } emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1))); } else if (devinfo->gen < 6) { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u)); + emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u))); } else { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0))); + emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), brw_imm_d(0))); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { dst_reg reg_w = reg; reg_w.writemask = WRITEMASK_W; @@ -1529,13 +1458,13 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, src_reg index = src_reg(this, glsl_type::int_type); emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); + brw_imm_d(reg_offset))); emit_before(block, inst, MUL(dst_reg(index), index, - src_reg(message_header_scale))); + brw_imm_d(message_header_scale))); return index; } else { - return src_reg(reg_offset * message_header_scale); + return brw_imm_d(reg_offset * message_header_scale); } } @@ -1547,24 +1476,24 @@ vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst, src_reg index = src_reg(this, glsl_type::int_type); emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); + brw_imm_d(reg_offset))); /* Pre-gen6, the message header uses byte offsets instead of vec4 * (16-byte) offset units. */ if (devinfo->gen < 6) { - emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16))); + emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16))); } return index; } else if (devinfo->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset))); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset))); return offset; } else { int message_header_scale = devinfo->gen < 6 ? 16 : 1; - return src_reg(reg_offset * message_header_scale); + return brw_imm_d(reg_offset * message_header_scale); } } @@ -1753,7 +1682,7 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, reg_offset); emit_pull_constant_load_reg(temp, - src_reg(index), + brw_imm_ud(index), offset, block, inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 5dd4f98cecc..fd8be7d972c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -50,7 +50,7 @@ vec4_vs_visitor::emit_prolog() dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1; - emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); + emit(MUL(dst, src_reg(dst), brw_imm_f(1.0f / 65536.0f))); } /* Do sign recovery for 2101010 formats if required. */ @@ -58,8 +58,8 @@ vec4_vs_visitor::emit_prolog() if (sign_recovery_shift.file == BAD_FILE) { /* shift constant: <22,22,22,30> */ sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type); - emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); - emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); + emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), brw_imm_ud(22u))); + emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), brw_imm_ud(30u))); } emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift))); @@ -87,16 +87,16 @@ vec4_vs_visitor::emit_prolog() /* mul constant: 1 / (2^(b-1) - 1) */ es3_normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), - src_reg(1.0f / ((1<<9) - 1)))); + brw_imm_f(1.0f / ((1<<9) - 1)))); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W), - src_reg(1.0f / ((1<<1) - 1)))); + brw_imm_f(1.0f / ((1<<1) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg(reg_d))); emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor))); - emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), brw_imm_f(-1.0f)); } else { /* The following equations are from the OpenGL 3.2 specification: * @@ -113,9 +113,9 @@ vec4_vs_visitor::emit_prolog() /* 1 / (2^b - 1) for b=<10,10,10,2> */ normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ), - src_reg(1.0f / ((1<<10) - 1)))); + brw_imm_f(1.0f / ((1<<10) - 1)))); emit(MOV(writemask(normalize_factor, WRITEMASK_W), - src_reg(1.0f / ((1<<2) - 1)))); + brw_imm_f(1.0f / ((1<<2) - 1)))); } dst_reg dst = reg; @@ -124,8 +124,8 @@ vec4_vs_visitor::emit_prolog() /* For signed normalization, we want the numerator to be 2c+1. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { - emit(MUL(dst, src_reg(dst), src_reg(2.0f))); - emit(ADD(dst, src_reg(dst), src_reg(1.0f))); + emit(MUL(dst, src_reg(dst), brw_imm_f(2.0f))); + emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f))); } emit(MUL(dst, src_reg(dst), src_reg(normalize_factor))); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 967448e0e41..7c783f66864 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -48,6 +48,7 @@ brw_codegen_vs_prog(struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { + const struct brw_compiler *compiler = brw->intelScreen->compiler; GLuint program_size; const GLuint *program; struct brw_vs_prog_data prog_data; @@ -79,7 +80,7 @@ brw_codegen_vs_prog(struct brw_context *brw, * by the state cache. */ int param_count = vp->program.Base.nir->num_uniforms; - if (!brw->intelScreen->compiler->scalar_vs) + if (!compiler->scalar_stage[MESA_SHADER_VERTEX]) param_count *= 4; if (vs) @@ -102,7 +103,7 @@ brw_codegen_vs_prog(struct brw_context *brw, if (prog) { brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base, &prog_data.base.base, - brw->intelScreen->compiler->scalar_vs); + compiler->scalar_stage[MESA_SHADER_VERTEX]); } else { brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base, &prog_data.base.base); @@ -173,7 +174,7 @@ brw_codegen_vs_prog(struct brw_context *brw, /* Emit GEN4 code. */ char *error_str; - program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key, + program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data, vp->program.Base.nir, brw_select_clip_planes(&brw->ctx), !_mesa_is_gles3(&brw->ctx), diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 2fef188c17e..3840ce0fe57 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -65,7 +65,7 @@ gen6_gs_visitor::emit_prolog() (prog_data->vue_map.num_slots + 1) * nir->info.gs.vertices_out); this->vertex_output_offset = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u))); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u))); /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES), * so initialize it once to R0. @@ -87,13 +87,13 @@ gen6_gs_visitor::emit_prolog() * headers. */ this->first_vertex = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START)); + emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(URB_WRITE_PRIM_START))); /* The FF_SYNC message requires to know the number of primitives generated, * so keep a counter for this. */ this->prim_count = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->prim_count), 0u)); + emit(MOV(dst_reg(this->prim_count), brw_imm_ud(0u))); if (gs_prog_data->gen6_xfb_enabled) { /* Create a virtual register to hold destination indices in SOL */ @@ -170,7 +170,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) } emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); } /* Now buffer flags for this vertex */ @@ -181,9 +181,9 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) /* If we are outputting points, then every vertex has PrimStart and * PrimEnd set. */ - emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | - URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)); - emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + emit(MOV(dst, brw_imm_d((_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | + URB_WRITE_PRIM_START | URB_WRITE_PRIM_END))); + emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u))); } else { /* Otherwise, we can only set the PrimStart flag, which we have stored * in the first_vertex register. We will have to wait until we execute @@ -191,11 +191,12 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) * vertex. */ emit(OR(dst, this->first_vertex, - (gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT))); - emit(MOV(dst_reg(this->first_vertex), 0u)); + brw_imm_ud(gs_prog_data->output_topology << + URB_WRITE_PRIM_TYPE_SHIFT))); + emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(0u))); } emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); } void @@ -218,10 +219,10 @@ gen6_gs_visitor::gs_end_primitive() * below). */ unsigned num_output_vertices = nir->info.gs.vertices_out; - emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1), - BRW_CONDITIONAL_L)); - vec4_instruction *inst = emit(CMP(dst_null_d(), - this->vertex_count, 0u, + emit(CMP(dst_null_ud(), this->vertex_count, + brw_imm_ud(num_output_vertices + 1), BRW_CONDITIONAL_L)); + vec4_instruction *inst = emit(CMP(dst_null_ud(), + this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ)); inst->predicate = BRW_PREDICATE_NORMAL; emit(IF(BRW_PREDICATE_NORMAL)); @@ -231,19 +232,19 @@ gen6_gs_visitor::gs_end_primitive() * vertex. */ src_reg offset(this, glsl_type::uint_type); - emit(ADD(dst_reg(offset), this->vertex_output_offset, src_reg(-1))); + emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1))); src_reg dst(this->vertex_output); dst.reladdr = ralloc(mem_ctx, src_reg); memcpy(dst.reladdr, &offset, sizeof(src_reg)); - emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END)); - emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + emit(OR(dst_reg(dst), dst, brw_imm_d(URB_WRITE_PRIM_END))); + emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u))); /* Set the first vertex flag to indicate that the next vertex will start * a primitive. */ - emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START)); + emit(MOV(dst_reg(this->first_vertex), brw_imm_d(URB_WRITE_PRIM_START))); } emit(BRW_OPCODE_ENDIF); } @@ -262,7 +263,8 @@ gen6_gs_visitor::emit_urb_write_header(int mrf) */ src_reg flags_offset(this, glsl_type::uint_type); emit(ADD(dst_reg(flags_offset), - this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots))); + this->vertex_output_offset, + brw_imm_d(prog_data->vue_map.num_slots))); src_reg flags_data(this->vertex_output); flags_data.reladdr = ralloc(mem_ctx, src_reg); @@ -321,7 +323,7 @@ gen6_gs_visitor::emit_thread_end() * points because in the point case we set PrimEnd on all vertices. */ if (nir->info.gs.output_primitive != GL_POINTS) { - emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z)); + emit(CMP(dst_null_ud(), this->first_vertex, brw_imm_ud(0u), BRW_CONDITIONAL_Z)); emit(IF(BRW_PREDICATE_NORMAL)); gs_end_primitive(); emit(BRW_OPCODE_ENDIF); @@ -347,7 +349,7 @@ gen6_gs_visitor::emit_thread_end() int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen); /* Issue the FF_SYNC message and obtain the initial VUE handle. */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G)); + emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G)); emit(IF(BRW_PREDICATE_NORMAL)); { this->current_annotation = "gen6 thread end: ff_sync"; @@ -364,15 +366,15 @@ gen6_gs_visitor::emit_thread_end() dst_reg(this->temp), this->prim_count, this->svbi); } else { inst = emit(GS_OPCODE_FF_SYNC, - dst_reg(this->temp), this->prim_count, src_reg(0u)); + dst_reg(this->temp), this->prim_count, brw_imm_ud(0u)); } inst->base_mrf = base_mrf; /* Loop over all buffered vertices and emit URB write messages */ this->current_annotation = "gen6 thread end: urb writes init"; src_reg vertex(this, glsl_type::uint_type); - emit(MOV(dst_reg(vertex), 0u)); - emit(MOV(dst_reg(this->vertex_output_offset), 0u)); + emit(MOV(dst_reg(vertex), brw_imm_ud(0u))); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u))); this->current_annotation = "gen6 thread end: urb writes"; emit(BRW_OPCODE_DO); @@ -416,7 +418,7 @@ gen6_gs_visitor::emit_thread_end() mrf++; emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); /* If this was max_usable_mrf, we can't fit anything more into * this URB WRITE. Same if we reached the max. message length. @@ -437,9 +439,9 @@ gen6_gs_visitor::emit_thread_end() * writing the next vertex. */ emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); - emit(ADD(dst_reg(vertex), vertex, 1u)); + emit(ADD(dst_reg(vertex), vertex, brw_imm_ud(1u))); } emit(BRW_OPCODE_WHILE); @@ -468,8 +470,8 @@ gen6_gs_visitor::emit_thread_end() if (gs_prog_data->gen6_xfb_enabled) { /* When emitting EOT, set SONumPrimsWritten Increment Value. */ src_reg data(this, glsl_type::uint_type); - emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu))); - emit(SHL(dst_reg(data), data, src_reg(16u))); + emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu))); + emit(SHL(dst_reg(data), data, brw_imm_ud(16u))); emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data); } @@ -588,8 +590,8 @@ gen6_gs_visitor::xfb_write() this->current_annotation = "gen6 thread end: svb writes init"; - emit(MOV(dst_reg(this->vertex_output_offset), 0u)); - emit(MOV(dst_reg(this->sol_prim_written), 0u)); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u))); + emit(MOV(dst_reg(this->sol_prim_written), brw_imm_ud(0u))); /* Check that at least one primitive can be written * @@ -600,7 +602,7 @@ gen6_gs_visitor::xfb_write() * transform feedback is in interleaved or separate attribs mode. */ src_reg sol_temp(this, glsl_type::uvec4_type); - emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts))); + emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts))); /* Compare SVBI calculated number with the maximum value, which is * in R1.4 (previously saved in this->max_svbi) for gen6. @@ -623,7 +625,7 @@ gen6_gs_visitor::xfb_write() /* Write transform feedback data for all processed vertices. */ for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) { - emit(MOV(dst_reg(sol_temp), i)); + emit(MOV(dst_reg(sol_temp), brw_imm_d(i))); emit(CMP(dst_null_d(), sol_temp, this->vertex_count, BRW_CONDITIONAL_L)); emit(IF(BRW_PREDICATE_NORMAL)); @@ -644,8 +646,8 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) /* Check for buffer overflow: we need room to write the complete primitive * (all vertices). Otherwise, avoid writing any vertices for it */ - emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u)); - emit(MUL(dst_reg(sol_temp), sol_temp, src_reg(num_verts))); + emit(ADD(dst_reg(sol_temp), this->sol_prim_written, brw_imm_ud(1u))); + emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts))); emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi)); emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE)); emit(IF(BRW_PREDICATE_NORMAL)); @@ -683,7 +685,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) src_reg data(this->vertex_output); data.reladdr = ralloc(mem_ctx, src_reg); int offset = get_vertex_output_offset_for_varying(vertex, varying); - emit(MOV(dst_reg(this->vertex_output_offset), offset)); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset))); memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg)); data.type = output_reg[varying].type; @@ -710,9 +712,9 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) */ emit(ADD(dst_reg(this->destination_indices), this->destination_indices, - src_reg(num_verts))); + brw_imm_ud(num_verts))); emit(ADD(dst_reg(this->sol_prim_written), - this->sol_prim_written, 1u)); + this->sol_prim_written, brw_imm_ud(1u))); } } diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 9f4a5db3592..d508c4c9278 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -136,8 +136,8 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo, IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */ IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */ VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */ - 0, /* HS_INVOCATION_COUNT,*/ /* TESS_CONTROL_SHADER_PATCHES */ - 0, /* DS_INVOCATION_COUNT,*/ /* TESS_EVALUATION_SHADER_INVOCATIONS */ + HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */ + DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */ GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */ PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */ CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */ @@ -231,6 +231,8 @@ gen6_queryobj_get_results(struct gl_context *ctx, case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: query->Base.Result = results[1] - results[0]; break; @@ -250,8 +252,6 @@ gen6_queryobj_get_results(struct gl_context *ctx, query->Base.Result /= 4; break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_queryobj_get_results()"); } @@ -329,11 +329,11 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q) case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0); break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_begin_query()"); } @@ -381,12 +381,12 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q) case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_GEOMETRY_SHADER_INVOCATIONS: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 1); break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_end_query()"); } diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 69162171c4e..161de77e156 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -118,7 +118,7 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS): * - * A PIPE_CONTOL command with the CS Stall bit set must be programmed + * A PIPE_CONTROL command with the CS Stall bit set must be programmed * in the ring after this instruction. * * No such restriction exists for Haswell or Baytrail. diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 140a6544983..9cdd1c71b4d 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -187,7 +187,13 @@ gen8_emit_fast_clear_color(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t *surf) { - surf[7] |= mt->fast_clear_color_value; + if (brw->gen >= 9) { + surf[12] = mt->gen9_fast_clear_color.ui[0]; + surf[13] = mt->gen9_fast_clear_color.ui[1]; + surf[14] = mt->gen9_fast_clear_color.ui[2]; + surf[15] = mt->gen9_fast_clear_color.ui[3]; + } else + surf[7] |= mt->fast_clear_color_value; } static void @@ -208,6 +214,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); + const uint32_t surf_type = translate_tex_target(target); if (mt->format == MESA_FORMAT_S_UINT8) { tiling_mode = GEN8_SURFACE_TILING_W; @@ -231,9 +238,14 @@ gen8_emit_texture_surface_state(struct brw_context *brw, */ if (brw->gen >= 9 || mt->num_samples == 1) assert(mt->halign == 16); + + if (brw->gen >= 9) { + assert(mt->num_samples > 1 || + brw_losslessly_compressible_format(brw, surf_type)); + } + } - const uint32_t surf_type = translate_tex_target(target); uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) | diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index c00d2e786f3..f53c4ab518a 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -75,6 +75,10 @@ static const struct debug_control debug_control[] = { { "cs", DEBUG_CS }, { "hex", DEBUG_HEX }, { "nocompact", DEBUG_NO_COMPACTION }, + { "hs", DEBUG_TCS }, + { "tcs", DEBUG_TCS }, + { "ds", DEBUG_TES }, + { "tes", DEBUG_TES }, { NULL, 0 } }; @@ -83,8 +87,8 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) { uint64_t flags[] = { [MESA_SHADER_VERTEX] = DEBUG_VS, - [MESA_SHADER_TESS_CTRL] = 0, - [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_TESS_CTRL] = DEBUG_TCS, + [MESA_SHADER_TESS_EVAL] = DEBUG_TES, [MESA_SHADER_GEOMETRY] = DEBUG_GS, [MESA_SHADER_FRAGMENT] = DEBUG_WM, [MESA_SHADER_COMPUTE] = DEBUG_CS, diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index 98bd7e93956..9c6030a6d7d 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -69,6 +69,8 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_CS (1ull << 33) #define DEBUG_HEX (1ull << 34) #define DEBUG_NO_COMPACTION (1ull << 35) +#define DEBUG_TCS (1ull << 36) +#define DEBUG_TES (1ull << 37) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 386b63c123d..2e2459c125b 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -333,6 +333,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; ctx->Extensions.ARB_shader_storage_buffer_object = true; + ctx->Extensions.EXT_shader_samples_identical = true; if (can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_draw_indirect = true; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index b1a7632d82f..87e01366932 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -35,6 +35,7 @@ #include "brw_blorp.h" #include "brw_context.h" +#include "brw_state.h" #include "main/enums.h" #include "main/fbobject.h" @@ -192,6 +193,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) * * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, * 64bpp, and 128bpp. + * + * From the Skylake documentation, it is made clear that X-tiling is no longer + * supported: + * + * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf + * non-MSRTs only. */ static bool intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, @@ -201,14 +208,6 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, if (brw->gen < 7) return false; - if (brw->gen >= 9) { - /* FINISHME: Enable singlesample fast MCS clears on SKL after all GPU - * FINISHME: hangs are resolved. - */ - perf_debug("singlesample fast MCS clears disabled on gen9"); - return false; - } - if (mt->disable_aux_buffers) return false; @@ -259,7 +258,11 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, if (!brw->format_supported_as_render_target[mt->format]) return false; - return true; + if (brw->gen >= 9) { + const uint32_t brw_format = brw_format_for_mesa_format(mt->format); + return brw_losslessly_compressible_format(brw, brw_format); + } else + return true; } @@ -1495,6 +1498,17 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height); unsigned width_divisor = block_width_px * 4; unsigned height_divisor = block_height * 8; + + /* The Skylake MCS is twice as tall as the Broadwell MCS. + * + * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines + * in the main surface. In Skylake, it's two bits. The extra bit + * doubles the MCS height, not width, because in Skylake the MCS is always + * Y-tiled. + */ + if (brw->gen >= 9) + height_divisor /= 2; + unsigned mcs_width = ALIGN(mt->logical_width0, width_divisor) / width_divisor; unsigned mcs_height = diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 805cd714d88..64f73ea9ae5 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -633,15 +633,22 @@ struct intel_mipmap_tree * The SURFACE_STATE bits associated with the last fast color clear to this * color mipmap tree, if any. * - * This value will only ever contain ones in bits 28-31, so it is safe to - * OR into dword 7 of SURFACE_STATE. + * Prior to GEN9 there is a single bit for RGBA clear values which gives you + * the option of 2^4 clear colors. Each bit determines if the color channel + * is fully saturated or unsaturated (Cherryview does add a 32b value per + * channel, but it is globally applied instead of being part of the render + * surface state). Starting with GEN9, the surface state accepts a 32b value + * for each color channel. * * @see RENDER_SURFACE_STATE.RedClearColor * @see RENDER_SURFACE_STATE.GreenClearColor * @see RENDER_SURFACE_STATE.BlueClearColor * @see RENDER_SURFACE_STATE.AlphaClearColor */ - uint32_t fast_clear_color_value; + union { + uint32_t fast_clear_color_value; + union gl_color_union gen9_fast_clear_color; + }; /** * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 62d39f70ec4..034d8a507fe 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -107,7 +107,7 @@ TEST_F(cmod_propagation_test, basic) fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -139,7 +139,7 @@ TEST_F(cmod_propagation_test, cmp_nonzero) fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - fs_reg nonzero(1.0f); + fs_reg nonzero(brw_imm_f(1.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE); @@ -171,7 +171,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::uint_type); fs_reg src0 = v->vgrf(glsl_type::uint_type); - fs_reg zero(0u); + fs_reg zero(brw_imm_ud(0u)); bld.FBL(dest, src0); bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE); @@ -205,7 +205,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -244,7 +244,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest0, src0, src1); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); @@ -282,7 +282,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(offset(dest, bld, 2), src0, src1); bld.emit(SHADER_OPCODE_TEX, dest, src2) ->regs_written = 4; @@ -323,7 +323,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1)); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); @@ -360,7 +360,7 @@ TEST_F(cmod_propagation_test, negate) fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); dest.negate = true; bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -425,7 +425,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::int_type); fs_reg src1 = v->vgrf(glsl_type::int_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero, BRW_CONDITIONAL_GE); @@ -458,8 +458,8 @@ TEST_F(cmod_propagation_test, andnz_one) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); - fs_reg one(1); + fs_reg zero(brw_imm_f(0.0f)); + fs_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -493,8 +493,8 @@ TEST_F(cmod_propagation_test, andnz_non_one) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); - fs_reg nonone(38); + fs_reg zero(brw_imm_f(0.0f)); + fs_reg nonone(brw_imm_d(38)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -528,8 +528,8 @@ TEST_F(cmod_propagation_test, andz_one) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); - fs_reg one(1); + fs_reg zero(brw_imm_f(0.0f)); + fs_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_Z, diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp index 9aa2fcc7907..e5e566c60bc 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp @@ -145,7 +145,7 @@ TEST_F(cmod_propagation_test, basic) dst_reg dest = dst_reg(v, glsl_type::float_type); src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); dest_null.writemask = WRITEMASK_X; @@ -181,7 +181,7 @@ TEST_F(cmod_propagation_test, basic_different_dst_writemask) dst_reg dest = dst_reg(v, glsl_type::float_type); src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); bld.ADD(dest, src0, src1); @@ -217,8 +217,8 @@ TEST_F(cmod_propagation_test, andz_one) const vec4_builder bld = vec4_builder(v).at_end(); dst_reg dest = dst_reg(v, glsl_type::int_type); src_reg src0 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); - src_reg one(1); + src_reg zero(brw_imm_f(0.0f)); + src_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_Z, @@ -253,7 +253,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction) const vec4_builder bld = vec4_builder(v).at_end(); dst_reg dest = dst_reg(v, glsl_type::uint_type); src_reg src0 = src_reg(v, glsl_type::uint_type); - src_reg zero(0u); + src_reg zero(brw_imm_ud(0u)); bld.FBL(dest, src0); bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE); @@ -288,7 +288,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE); @@ -328,7 +328,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest0, src0, src1); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE); @@ -367,7 +367,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::vec2_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(offset(dest, 2), src0, src1); bld.emit(SHADER_OPCODE_TEX, dest, src2) ->regs_written = 4; @@ -409,7 +409,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); dest_null.writemask = WRITEMASK_X; @@ -449,7 +449,7 @@ TEST_F(cmod_propagation_test, negate) dst_reg dest = dst_reg(v, glsl_type::float_type); src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); src_reg tmp_src = src_reg(dest); tmp_src.negate = true; @@ -521,7 +521,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) dst_reg dest = dst_reg(v, glsl_type::int_type); src_reg src0 = src_reg(v, glsl_type::int_type); src_reg src1 = src_reg(v, glsl_type::int_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero, BRW_CONDITIONAL_GE); @@ -555,8 +555,8 @@ TEST_F(cmod_propagation_test, andnz_non_one) const vec4_builder bld = vec4_builder(v).at_end(); dst_reg dest = dst_reg(v, glsl_type::int_type); src_reg src0 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); - src_reg nonone(38); + src_reg zero(brw_imm_f(0.0f)); + src_reg nonone(brw_imm_d(38)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -594,7 +594,7 @@ TEST_F(cmod_propagation_test, basic_vec4) dst_reg dest = dst_reg(v, glsl_type::vec4_type); src_reg src0 = src_reg(v, glsl_type::vec4_type); src_reg src1 = src_reg(v, glsl_type::vec4_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.MUL(dest, src0, src1); bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ); @@ -628,7 +628,7 @@ TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask) dest.writemask = WRITEMASK_X; src_reg src0 = src_reg(v, glsl_type::vec4_type); src_reg src1 = src_reg(v, glsl_type::vec4_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); bld.MUL(dest, src0, src1); @@ -668,7 +668,7 @@ TEST_F(cmod_propagation_test, mad_one_component_vec4) src_reg src2 = src_reg(v, glsl_type::vec4_type); src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; src2.negate = true; - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); src_reg tmp(dest); tmp.swizzle = BRW_SWIZZLE_XXXX; dst_reg dest_null = bld.null_reg_f(); @@ -710,7 +710,7 @@ TEST_F(cmod_propagation_test, mad_more_one_component_vec4) src_reg src2 = src_reg(v, glsl_type::vec4_type); src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; src2.negate = true; - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); src_reg tmp(dest); tmp.swizzle = BRW_SWIZZLE_XXXX; dst_reg dest_null = bld.null_reg_f(); @@ -751,7 +751,7 @@ TEST_F(cmod_propagation_test, cmp_mov_vec4) src_reg src0 = src_reg(v, glsl_type::ivec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; src0.file = UNIFORM; - src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D); + src_reg nonone = retype(brw_imm_d(16), BRW_REGISTER_TYPE_D); src_reg mov_src = src_reg(dest); mov_src.swizzle = BRW_SWIZZLE_XXXX; dst_reg dest_null = bld.null_reg_d(); @@ -790,7 +790,7 @@ TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4) dst_reg dest = dst_reg(v, glsl_type::vec4_type); src_reg src0 = src_reg(v, glsl_type::vec4_type); src_reg src1 = src_reg(v, glsl_type::vec4_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); src_reg cmp_src = src_reg(dest); cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2); diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index a1f91d9c56a..ede409b6919 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -162,7 +162,7 @@ TEST_F(copy_propagation_test, test_swizzle_writemask) SWIZZLE_X, SWIZZLE_Z)))); - v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), src_reg(1.0f))); + v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), brw_imm_f(1.0f))); vec4_instruction *test_mov = v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(SWIZZLE_W, diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index d84e2e98ec0..90a6bc3618f 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -135,7 +135,7 @@ TEST_F(register_coalesce_test, test_compute_to_mrf) m0.writemask = WRITEMASK_X; m0.type = BRW_REGISTER_TYPE_F; - vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f))); + vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f))); v->emit(v->MOV(m0, src_reg(temp))); register_coalesce(v); @@ -159,7 +159,7 @@ TEST_F(register_coalesce_test, test_multiple_use) m1.type = BRW_REGISTER_TYPE_F; src_reg src = src_reg(temp); - vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f))); + vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f))); src.swizzle = BRW_SWIZZLE_XXXX; v->emit(v->MOV(m0, src)); src.swizzle = BRW_SWIZZLE_XYZW; diff --git a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp index 6a8bceabf16..7af97d0d097 100644 --- a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp +++ b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp @@ -40,15 +40,10 @@ void vf_float_conversion_test::SetUp() { int ebits = (vf >> 4) & 0x7; int mbits = vf & 0xf; - int e = ebits - 3; + float x = 1.0f + mbits / 16.0f; + int exp = ebits - 3; - float value = 1.0f; - - value += mbits / 16.0f; - - value *= exp2f(e); - - vf_to_float[vf] = value; + vf_to_float[vf] = ldexpf(x, exp); } } @@ -98,3 +93,18 @@ TEST_F(vf_float_conversion_test, test_special_case_0) EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(+0.0f))), f2u(+0.0f)); EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(-0.0f))), f2u(-0.0f)); } + +TEST_F(vf_float_conversion_test, test_nonrepresentable_float_input) +{ + EXPECT_EQ(brw_float_to_vf(+32.0f), -1); + EXPECT_EQ(brw_float_to_vf(-32.0f), -1); + + EXPECT_EQ(brw_float_to_vf(+16.5f), -1); + EXPECT_EQ(brw_float_to_vf(-16.5f), -1); + + EXPECT_EQ(brw_float_to_vf(+8.25f), -1); + EXPECT_EQ(brw_float_to_vf(-8.25f), -1); + + EXPECT_EQ(brw_float_to_vf(+4.125f), -1); + EXPECT_EQ(brw_float_to_vf(-4.125f), -1); +} diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h index a8c31b741ed..14f5e71fadf 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.h +++ b/src/mesa/drivers/dri/r200/r200_tex.h @@ -63,7 +63,9 @@ static const struct tx_table tx_table_be[] = [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 }, [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, @@ -91,7 +93,9 @@ static const struct tx_table tx_table_le[] = [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h index f8ec432755a..37c2fa0dc2f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.h +++ b/src/mesa/drivers/dri/radeon/radeon_tex.h @@ -63,6 +63,8 @@ static const struct tx_table tx_table[] = [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8X8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 }, + [ MESA_FORMAT_X8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index ddf7f497f1e..2ae22e9e691 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -67,7 +67,7 @@ legal_src_factor(const struct gl_context *ctx, GLenum factor) case GL_SRC1_ALPHA: case GL_ONE_MINUS_SRC1_COLOR: case GL_ONE_MINUS_SRC1_ALPHA: - return _mesa_is_desktop_gl(ctx) + return ctx->API != API_OPENGLES && ctx->Extensions.ARB_blend_func_extended; default: return GL_FALSE; @@ -100,14 +100,14 @@ legal_dst_factor(const struct gl_context *ctx, GLenum factor) case GL_ONE_MINUS_CONSTANT_ALPHA: return _mesa_is_desktop_gl(ctx) || ctx->API == API_OPENGLES2; case GL_SRC_ALPHA_SATURATE: - return (_mesa_is_desktop_gl(ctx) + return (ctx->API != API_OPENGLES && ctx->Extensions.ARB_blend_func_extended) || _mesa_is_gles3(ctx); case GL_SRC1_COLOR: case GL_SRC1_ALPHA: case GL_ONE_MINUS_SRC1_COLOR: case GL_ONE_MINUS_SRC1_ALPHA: - return _mesa_is_desktop_gl(ctx) + return ctx->API != API_OPENGLES && ctx->Extensions.ARB_blend_func_extended; default: return GL_FALSE; @@ -404,7 +404,7 @@ _mesa_BlendEquation( GLenum mode ) ctx->Color._BlendEquationPerBuffer = GL_FALSE; if (ctx->Driver.BlendEquationSeparate) - (*ctx->Driver.BlendEquationSeparate)( ctx, mode, mode ); + ctx->Driver.BlendEquationSeparate(ctx, mode, mode); } @@ -582,7 +582,7 @@ _mesa_BlendColor( GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha ) ctx->Color.BlendColor[3] = CLAMP(tmp[3], 0.0F, 1.0F); if (ctx->Driver.BlendColor) - (*ctx->Driver.BlendColor)(ctx, ctx->Color.BlendColor); + ctx->Driver.BlendColor(ctx, ctx->Color.BlendColor); } diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 93588a2ee18..83e238ae825 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -731,7 +731,7 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, /* Call the device driver function only if fb is the bound read buffer */ if (fb == ctx->ReadBuffer) { if (ctx->Driver.ReadBuffer) - (*ctx->Driver.ReadBuffer)(ctx, buffer); + ctx->Driver.ReadBuffer(ctx, buffer); } } diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 42f67990784..a8a667e3c12 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -369,10 +369,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) break; case GL_DEBUG_OUTPUT: case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_enum_error; - else - _mesa_set_debug_state_int(ctx, cap, state); + _mesa_set_debug_state_int(ctx, cap, state); break; case GL_DITHER: if (ctx->Color.DitherFlag == state) @@ -1225,10 +1222,7 @@ _mesa_IsEnabled( GLenum cap ) return ctx->Polygon.CullFlag; case GL_DEBUG_OUTPUT: case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_enum_error; - else - return (GLboolean) _mesa_get_debug_state_int(ctx, cap); + return (GLboolean) _mesa_get_debug_state_int(ctx, cap); case GL_DEPTH_TEST: return ctx->Depth.Test; case GL_DITHER: diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c index f720de316e4..366b119aba3 100644 --- a/src/mesa/main/errors.c +++ b/src/mesa/main/errors.c @@ -978,9 +978,13 @@ _mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id, GLenum severity, GLint length, const GLchar *buf) { - const char *callerstr = "glDebugMessageInsert"; - GET_CURRENT_CONTEXT(ctx); + const char *callerstr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glDebugMessageInsert"; + else + callerstr = "glDebugMessageInsertKHR"; if (!validate_params(ctx, INSERT, callerstr, source, type, severity)) return; /* GL_INVALID_ENUM */ @@ -1004,15 +1008,21 @@ _mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum *sources, { GET_CURRENT_CONTEXT(ctx); struct gl_debug_state *debug; + const char *callerstr; GLuint ret; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetDebugMessageLog"; + else + callerstr = "glGetDebugMessageLogKHR"; + if (!messageLog) logSize = 0; if (logSize < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glGetDebugMessageLog(logSize=%d : logSize must not be" - " negative)", logSize); + "%s(logSize=%d : logSize must not be negative)", + callerstr, logSize); return 0; } @@ -1066,9 +1076,14 @@ _mesa_DebugMessageControl(GLenum gl_source, GLenum gl_type, enum mesa_debug_source source = gl_enum_to_debug_source(gl_source); enum mesa_debug_type type = gl_enum_to_debug_type(gl_type); enum mesa_debug_severity severity = gl_enum_to_debug_severity(gl_severity); - const char *callerstr = "glDebugMessageControl"; + const char *callerstr; struct gl_debug_state *debug; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glDebugMessageControl"; + else + callerstr = "glDebugMessageControlKHR"; + if (count < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(count=%d : count must not be negative)", callerstr, @@ -1124,10 +1139,15 @@ _mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length, const GLchar *message) { GET_CURRENT_CONTEXT(ctx); - const char *callerstr = "glPushDebugGroup"; + const char *callerstr; struct gl_debug_state *debug; struct gl_debug_message *emptySlot; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glPushDebugGroup"; + else + callerstr = "glPushDebugGroupKHR"; + switch(source) { case GL_DEBUG_SOURCE_APPLICATION: case GL_DEBUG_SOURCE_THIRD_PARTY: @@ -1176,10 +1196,15 @@ void GLAPIENTRY _mesa_PopDebugGroup(void) { GET_CURRENT_CONTEXT(ctx); - const char *callerstr = "glPopDebugGroup"; + const char *callerstr; struct gl_debug_state *debug; struct gl_debug_message *gdmessage, msg; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glPopDebugGroup"; + else + callerstr = "glPopDebugGroupKHR"; + debug = _mesa_lock_debug_state(ctx); if (!debug) return; diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index e94d2b74749..fa50cb68cca 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -40,7 +40,6 @@ struct gl_extensions _mesa_extension_override_enables; struct gl_extensions _mesa_extension_override_disables; static char *extra_extensions = NULL; -static char *cant_disable_extensions = NULL; /** @@ -68,29 +67,30 @@ const struct mesa_extension _mesa_extension_table[] = { #undef EXT }; +static bool disabled_extensions[ARRAY_SIZE(_mesa_extension_table)]; /** * Given an extension name, lookup up the corresponding member of struct - * gl_extensions and return that member's offset (in bytes). If the name is - * not found in the \c _mesa_extension_table, return 0. + * gl_extensions and return that member's index. If the name is + * not found in the \c _mesa_extension_table, return -1. * * \param name Name of extension. - * \return Offset of member in struct gl_extensions. + * \return Index of member in struct gl_extensions. */ -static size_t -name_to_offset(const char* name) +static int +name_to_index(const char* name) { unsigned i; if (name == 0) - return 0; + return -1; for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) { if (strcmp(name, _mesa_extension_table[i].name) == 0) - return _mesa_extension_table[i].offset; + return i; } - return 0; + return -1; } /** @@ -206,11 +206,11 @@ _mesa_enable_sw_extensions(struct gl_context *ctx) * \return offset of extensions withint `ext' or 0 if extension is not known */ static size_t -set_extension(struct gl_extensions *ext, const char *name, GLboolean state) +set_extension(struct gl_extensions *ext, int i, GLboolean state) { size_t offset; - offset = name_to_offset(name); + offset = i < 0 ? 0 : _mesa_extension_table[i].offset; if (offset != 0 && (offset != o(dummy_true) || state != GL_FALSE)) { ((GLboolean *) ext)[offset] = state; } @@ -240,12 +240,6 @@ get_extension_override( struct gl_context *ctx ) { override_extensions_in_context(ctx); - if (cant_disable_extensions != NULL) { - _mesa_problem(ctx, - "Trying to disable permanently enabled extensions: %s", - cant_disable_extensions); - } - if (extra_extensions == NULL) { return calloc(1, sizeof(char)); } else { @@ -257,7 +251,7 @@ get_extension_override( struct gl_context *ctx ) /** - * \brief Free extra_extensions and cant_disable_extensions strings + * \brief Free extra_extensions string * * These strings are allocated early during the first context creation by * _mesa_one_time_init_extension_overrides. @@ -266,7 +260,6 @@ static void free_unknown_extensions_strings(void) { free(extra_extensions); - free(cant_disable_extensions); } @@ -295,21 +288,20 @@ _mesa_one_time_init_extension_overrides(void) /* extra_exts: List of unrecognized extensions. */ extra_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char)); - cant_disable_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char)); /* Copy env_const because strtok() is destructive. */ env = strdup(env_const); - if (env == NULL || extra_extensions == NULL || - cant_disable_extensions == NULL) { - free(env); - free(extra_extensions); - free(cant_disable_extensions); - return; + if (env == NULL || + extra_extensions == NULL) { + free(env); + free(extra_extensions); + return; } for (ext = strtok(env, " "); ext != NULL; ext = strtok(NULL, " ")) { int enable; + int i; bool recognized; switch (ext[0]) { case '+': @@ -325,7 +317,8 @@ _mesa_one_time_init_extension_overrides(void) break; } - offset = set_extension(&_mesa_extension_override_enables, ext, enable); + i = name_to_index(ext); + offset = set_extension(&_mesa_extension_override_enables, i, enable); if (offset != 0 && (offset != o(dummy_true) || enable != GL_FALSE)) { ((GLboolean *) &_mesa_extension_override_disables)[offset] = !enable; recognized = true; @@ -333,14 +326,12 @@ _mesa_one_time_init_extension_overrides(void) recognized = false; } - if (!recognized) { - if (enable) { - strcat(extra_extensions, ext); - strcat(extra_extensions, " "); - } else if (offset == o(dummy_true)) { - strcat(cant_disable_extensions, ext); - strcat(cant_disable_extensions, " "); - } + if (i >= 0) + disabled_extensions[i] = !enable; + + if (!recognized && enable) { + strcat(extra_extensions, ext); + strcat(extra_extensions, " "); } } @@ -354,13 +345,6 @@ _mesa_one_time_init_extension_overrides(void) } else if (extra_extensions[len - 1] == ' ') { extra_extensions[len - 1] = '\0'; } - len = strlen(cant_disable_extensions); - if (len == 0) { - free(cant_disable_extensions); - cant_disable_extensions = NULL; - } else if (cant_disable_extensions[len - 1] == ' ') { - cant_disable_extensions[len - 1] = '\0'; - } } @@ -401,7 +385,8 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i) const bool *base = (bool *) &ctx->Extensions; const struct mesa_extension *ext = _mesa_extension_table + i; - return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset]; + return !disabled_extensions[i] && + (ctx->Version >= ext->version[ctx->API]) && base[ext->offset]; } /** diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index d12fd9f1c8d..051d69a3613 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -1,8 +1,31 @@ +/* The extension table is alphabetically sorted by the extension name string column. */ + #define GLL 0 #define GLC 0 #define ES1 0 #define ES2 0 #define x ~0 + +EXT(3DFX_texture_compression_FXT1 , TDFX_texture_compression_FXT1 , GLL, GLC, x , x , 1999) + +EXT(AMD_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2009) +EXT(AMD_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009) +EXT(AMD_performance_monitor , AMD_performance_monitor , GLL, GLC, x , x , 2007) +EXT(AMD_pinned_memory , AMD_pinned_memory , GLL, GLC, x , x , 2013) +EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009) +EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009) +EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012) +EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , x , GLC, x , x , 2012) +EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , x , GLC, x , x , 2012) + +EXT(ANGLE_texture_compression_dxt3 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) +EXT(ANGLE_texture_compression_dxt5 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) + +EXT(APPLE_object_purgeable , APPLE_object_purgeable , GLL, GLC, x , x , 2006) +EXT(APPLE_packed_pixels , dummy_true , GLL, x , x , x , 2002) +EXT(APPLE_texture_max_level , dummy_true , x , x , ES1, ES2, 2009) +EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002) + EXT(ARB_ES2_compatibility , ARB_ES2_compatibility , GLL, GLC, x , x , 2009) EXT(ARB_ES3_compatibility , ARB_ES3_compatibility , GLL, GLC, x , x , 2012) EXT(ARB_arrays_of_arrays , ARB_arrays_of_arrays , GLL, GLC, x , x , 2012) @@ -16,9 +39,9 @@ EXT(ARB_color_buffer_float , ARB_color_buffer_float EXT(ARB_compressed_texture_pixel_storage , dummy_true , GLL, GLC, x , x , 2011) EXT(ARB_compute_shader , ARB_compute_shader , GLL, GLC, x , x , 2012) EXT(ARB_conditional_render_inverted , ARB_conditional_render_inverted , GLL, GLC, x , x , 2014) +EXT(ARB_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2011) EXT(ARB_copy_buffer , dummy_true , GLL, GLC, x , x , 2008) EXT(ARB_copy_image , ARB_copy_image , GLL, GLC, x , x , 2012) -EXT(ARB_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2011) EXT(ARB_debug_output , dummy_true , GLL, GLC, x , x , 2009) EXT(ARB_depth_buffer_float , ARB_depth_buffer_float , GLL, GLC, x , x , 2008) EXT(ARB_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2003) @@ -56,8 +79,8 @@ EXT(ARB_multi_bind , dummy_true EXT(ARB_multi_draw_indirect , ARB_draw_indirect , x , GLC, x , x , 2012) EXT(ARB_multisample , dummy_true , GLL, x , x , x , 1994) EXT(ARB_multitexture , dummy_true , GLL, x , x , x , 1998) -EXT(ARB_occlusion_query2 , ARB_occlusion_query2 , GLL, GLC, x , x , 2003) EXT(ARB_occlusion_query , ARB_occlusion_query , GLL, x , x , x , 2001) +EXT(ARB_occlusion_query2 , ARB_occlusion_query2 , GLL, GLC, x , x , 2003) EXT(ARB_pipeline_statistics_query , ARB_pipeline_statistics_query , GLL, GLC, x , x , 2014) EXT(ARB_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004) EXT(ARB_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997) @@ -83,13 +106,13 @@ EXT(ARB_shader_subroutine , ARB_shader_subroutine EXT(ARB_shader_texture_image_samples , ARB_shader_texture_image_samples , GLL, GLC, x , x , 2014) EXT(ARB_shader_texture_lod , ARB_shader_texture_lod , GLL, GLC, x , x , 2009) EXT(ARB_shading_language_100 , dummy_true , GLL, x , x , x , 2003) -EXT(ARB_shading_language_packing , ARB_shading_language_packing , GLL, GLC, x , x , 2011) EXT(ARB_shading_language_420pack , ARB_shading_language_420pack , GLL, GLC, x , x , 2011) +EXT(ARB_shading_language_packing , ARB_shading_language_packing , GLL, GLC, x , x , 2011) EXT(ARB_shadow , ARB_shadow , GLL, x , x , x , 2001) EXT(ARB_stencil_texturing , ARB_stencil_texturing , GLL, GLC, x , x , 2012) EXT(ARB_sync , ARB_sync , GLL, GLC, x , x , 2003) -EXT(ARB_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2014) EXT(ARB_tessellation_shader , ARB_tessellation_shader , x , GLC, x , x , 2009) +EXT(ARB_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2014) EXT(ARB_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 2000) EXT(ARB_texture_buffer_object , ARB_texture_buffer_object , x , GLC, x , x , 2008) EXT(ARB_texture_buffer_object_rgb32 , ARB_texture_buffer_object_rgb32 , x , GLC, x , x , 2009) @@ -105,20 +128,20 @@ EXT(ARB_texture_env_crossbar , ARB_texture_env_crossbar EXT(ARB_texture_env_dot3 , ARB_texture_env_dot3 , GLL, x , x , x , 2001) EXT(ARB_texture_float , ARB_texture_float , GLL, GLC, x , x , 2004) EXT(ARB_texture_gather , ARB_texture_gather , GLL, GLC, x , x , 2009) -EXT(ARB_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 2001) EXT(ARB_texture_mirror_clamp_to_edge , ARB_texture_mirror_clamp_to_edge , GLL, GLC, x , x , 2013) +EXT(ARB_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 2001) EXT(ARB_texture_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2009) EXT(ARB_texture_non_power_of_two , ARB_texture_non_power_of_two , GLL, GLC, x , x , 2003) EXT(ARB_texture_query_levels , ARB_texture_query_levels , GLL, GLC, x , x , 2012) EXT(ARB_texture_query_lod , ARB_texture_query_lod , GLL, GLC, x , x , 2009) EXT(ARB_texture_rectangle , NV_texture_rectangle , GLL, GLC, x , x , 2004) -EXT(ARB_texture_rgb10_a2ui , ARB_texture_rgb10_a2ui , GLL, GLC, x , x , 2009) EXT(ARB_texture_rg , ARB_texture_rg , GLL, GLC, x , x , 2008) +EXT(ARB_texture_rgb10_a2ui , ARB_texture_rgb10_a2ui , GLL, GLC, x , x , 2009) EXT(ARB_texture_stencil8 , ARB_texture_stencil8 , GLL, GLC, x , x , 2013) EXT(ARB_texture_storage , dummy_true , GLL, GLC, x , x , 2011) EXT(ARB_texture_storage_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2012) -EXT(ARB_texture_view , ARB_texture_view , GLL, GLC, x , x , 2012) EXT(ARB_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008) +EXT(ARB_texture_view , ARB_texture_view , GLL, GLC, x , x , 2012) EXT(ARB_timer_query , ARB_timer_query , GLL, GLC, x , x , 2010) EXT(ARB_transform_feedback2 , ARB_transform_feedback2 , GLL, GLC, x , x , 2010) EXT(ARB_transform_feedback3 , ARB_transform_feedback3 , GLL, GLC, x , x , 2010) @@ -127,28 +150,39 @@ EXT(ARB_transpose_matrix , dummy_true EXT(ARB_uniform_buffer_object , ARB_uniform_buffer_object , GLL, GLC, x , x , 2009) EXT(ARB_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008) EXT(ARB_vertex_array_object , dummy_true , GLL, GLC, x , x , 2006) +EXT(ARB_vertex_attrib_64bit , ARB_vertex_attrib_64bit , x , GLC, x , x , 2010) EXT(ARB_vertex_attrib_binding , dummy_true , GLL, GLC, x , x , 2012) EXT(ARB_vertex_buffer_object , dummy_true , GLL, x , x , x , 2003) EXT(ARB_vertex_program , ARB_vertex_program , GLL, x , x , x , 2002) EXT(ARB_vertex_shader , ARB_vertex_shader , GLL, GLC, x , x , 2002) -EXT(ARB_vertex_attrib_64bit , ARB_vertex_attrib_64bit , x , GLC, x , x , 2010) EXT(ARB_vertex_type_10f_11f_11f_rev , ARB_vertex_type_10f_11f_11f_rev , GLL, GLC, x , x , 2013) EXT(ARB_vertex_type_2_10_10_10_rev , ARB_vertex_type_2_10_10_10_rev , GLL, GLC, x , x , 2009) EXT(ARB_viewport_array , ARB_viewport_array , x , GLC, x , x , 2010) EXT(ARB_window_pos , dummy_true , GLL, x , x , x , 2001) +EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003) +EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002) +EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001) +EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006) +EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004) +EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002) +EXT(ATI_texture_float , ARB_texture_float , GLL, GLC, x , x , 2002) +EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006) + EXT(EXT_abgr , dummy_true , GLL, GLC, x , x , 1995) EXT(EXT_bgra , dummy_true , GLL, x , x , x , 1995) EXT(EXT_blend_color , EXT_blend_color , GLL, x , x , x , 1995) EXT(EXT_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003) +EXT(EXT_blend_func_extended , ARB_blend_func_extended , x , x , x , ES2, 2015) EXT(EXT_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) -EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015) -EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_blend_minmax , EXT_blend_minmax , GLL, x , ES1, ES2, 1995) EXT(EXT_blend_subtract , dummy_true , GLL, x , x , x , 1995) +EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015) +EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013) EXT(EXT_compiled_vertex_array , dummy_true , GLL, x , x , x , 1996) EXT(EXT_copy_texture , dummy_true , GLL, x , x , x , 1995) EXT(EXT_depth_bounds_test , EXT_depth_bounds_test , GLL, GLC, x , x , 2002) +EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_draw_buffers , dummy_true , x , x , x , ES2, 2012) EXT(EXT_draw_buffers2 , EXT_draw_buffers2 , GLL, GLC, x , x , 2006) EXT(EXT_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014) @@ -172,20 +206,21 @@ EXT(EXT_point_parameters , EXT_point_parameters EXT(EXT_polygon_offset , dummy_true , GLL, x , x , x , 1995) EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, x , x , 2014) EXT(EXT_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009) +EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997) EXT(EXT_secondary_color , dummy_true , GLL, x , x , x , 1999) EXT(EXT_separate_shader_objects , dummy_true , x , x , x , ES2, 2013) EXT(EXT_separate_specular_color , dummy_true , GLL, x , x , x , 1997) -EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, ES1, 30, 2013) +EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, x , 30, 2013) +EXT(EXT_shader_samples_identical , EXT_shader_samples_identical , GLL, GLC, x , 31, 2015) EXT(EXT_shadow_funcs , ARB_shadow , GLL, x , x , x , 2002) EXT(EXT_stencil_two_side , EXT_stencil_two_side , GLL, x , x , x , 2001) EXT(EXT_stencil_wrap , dummy_true , GLL, x , x , x , 2002) EXT(EXT_subtexture , dummy_true , GLL, x , x , x , 1995) +EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006) EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004) -EXT(ANGLE_texture_compression_dxt3 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) -EXT(ANGLE_texture_compression_dxt5 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006) EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004) EXT(EXT_texture_compression_s3tc , EXT_texture_compression_s3tc , GLL, GLC, x , x , 2000) @@ -196,28 +231,66 @@ EXT(EXT_texture_env_combine , dummy_true EXT(EXT_texture_env_dot3 , EXT_texture_env_dot3 , GLL, x , x , x , 2000) EXT(EXT_texture_filter_anisotropic , EXT_texture_filter_anisotropic , GLL, GLC, ES1, ES2, 1999) EXT(EXT_texture_format_BGRA8888 , dummy_true , x , x , ES1, ES2, 2005) -EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011) -EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_texture_integer , EXT_texture_integer , GLL, GLC, x , x , 2006) EXT(EXT_texture_lod_bias , dummy_true , GLL, x , ES1, x , 1999) EXT(EXT_texture_mirror_clamp , EXT_texture_mirror_clamp , GLL, GLC, x , x , 2004) EXT(EXT_texture_object , dummy_true , GLL, x , x , x , 1995) -EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2004) -EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004) -EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009) +EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011) EXT(EXT_texture_sRGB , EXT_texture_sRGB , GLL, GLC, x , x , 2004) EXT(EXT_texture_sRGB_decode , EXT_texture_sRGB_decode , GLL, GLC, x , x , 2006) +EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004) +EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009) EXT(EXT_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008) EXT(EXT_texture_type_2_10_10_10_REV , dummy_true , x , x , x , ES2, 2008) EXT(EXT_timer_query , EXT_timer_query , GLL, GLC, x , x , 2006) EXT(EXT_transform_feedback , EXT_transform_feedback , GLL, GLC, x , x , 2011) EXT(EXT_unpack_subimage , dummy_true , x , x , x , ES2, 2011) -EXT(EXT_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008) EXT(EXT_vertex_array , dummy_true , GLL, x , x , x , 1995) -EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013) +EXT(EXT_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008) +EXT(IBM_multimode_draw_arrays , dummy_true , GLL, GLC, x , x , 1998) +EXT(IBM_rasterpos_clip , dummy_true , GLL, x , x , x , 1996) +EXT(IBM_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 1998) +EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) + +EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013) + +EXT(KHR_context_flush_control , dummy_true , GLL, GLC, x , ES2, 2014) +EXT(KHR_debug , dummy_true , GLL, GLC, ES1, ES2, 2012) +EXT(KHR_texture_compression_astc_hdr , KHR_texture_compression_astc_hdr , GLL, GLC, x , ES2, 2012) +EXT(KHR_texture_compression_astc_ldr , KHR_texture_compression_astc_ldr , GLL, GLC, x , ES2, 2012) + +EXT(MESA_pack_invert , MESA_pack_invert , GLL, GLC, x , x , 2002) +EXT(MESA_texture_signed_rgba , EXT_texture_snorm , GLL, GLC, x , x , 2009) +EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000) +EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002) + +EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999) +EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008) +EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001) +EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011) +EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010) +EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001) +EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005) +EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999) +EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000) +EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001) +EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002) +EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011) +EXT(NV_read_depth , dummy_true , x , x , x , ES2, 2011) +EXT(NV_read_depth_stencil , dummy_true , x , x , x , ES2, 2011) +EXT(NV_read_stencil , dummy_true , x , x , x , ES2, 2011) +EXT(NV_texgen_reflection , dummy_true , GLL, x , x , x , 1999) +EXT(NV_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2009) +EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999) +EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000) +EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010) + +EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ +EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010) +EXT(OES_EGL_sync , dummy_true , x , x , ES1, ES2, 2010) EXT(OES_blend_equation_separate , EXT_blend_equation_separate , x , x , ES1, x , 2009) EXT(OES_blend_func_separate , EXT_blend_func_separate , x , x , ES1, x , 2009) EXT(OES_blend_subtract , dummy_true , x , x , ES1, x , 2009) @@ -230,9 +303,6 @@ EXT(OES_depth_texture , ARB_depth_texture EXT(OES_depth_texture_cube_map , OES_depth_texture_cube_map , x , x , x , ES2, 2012) EXT(OES_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014) EXT(OES_draw_texture , OES_draw_texture , x , x , ES1, x , 2004) -EXT(OES_EGL_sync , dummy_true , x , x , ES1, ES2, 2010) -EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ -EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010) EXT(OES_element_index_uint , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002) @@ -260,73 +330,17 @@ EXT(OES_texture_float_linear , OES_texture_float_linear EXT(OES_texture_half_float , OES_texture_half_float , x , x , x , ES2, 2005) EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005) EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005) -EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014) EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005) +EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014) EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010) - -EXT(KHR_debug , dummy_true , GLL, GLC, x , x , 2012) -EXT(KHR_context_flush_control , dummy_true , GLL, GLC, x , ES2, 2014) -EXT(KHR_texture_compression_astc_hdr , KHR_texture_compression_astc_hdr , GLL, GLC, x , ES2, 2012) -EXT(KHR_texture_compression_astc_ldr , KHR_texture_compression_astc_ldr , GLL, GLC, x , ES2, 2012) - - -EXT(3DFX_texture_compression_FXT1 , TDFX_texture_compression_FXT1 , GLL, GLC, x , x , 1999) -EXT(AMD_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2009) -EXT(AMD_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009) -EXT(AMD_performance_monitor , AMD_performance_monitor , GLL, GLC, x , x , 2007) -EXT(AMD_pinned_memory , AMD_pinned_memory , GLL, GLC, x , x , 2013) -EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009) -EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009) -EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012) -EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , x , GLC, x , x , 2012) -EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , x , GLC, x , x , 2012) -EXT(APPLE_object_purgeable , APPLE_object_purgeable , GLL, GLC, x , x , 2006) -EXT(APPLE_packed_pixels , dummy_true , GLL, x , x , x , 2002) -EXT(APPLE_texture_max_level , dummy_true , x , x , ES1, ES2, 2009) -EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002) -EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003) -EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002) -EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001) -EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006) -EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004) -EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002) -EXT(ATI_texture_float , ARB_texture_float , GLL, GLC, x , x , 2002) -EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006) -EXT(IBM_multimode_draw_arrays , dummy_true , GLL, GLC, x , x , 1998) -EXT(IBM_rasterpos_clip , dummy_true , GLL, x , x , x , 1996) -EXT(IBM_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 1998) -EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) -EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013) -EXT(MESA_pack_invert , MESA_pack_invert , GLL, GLC, x , x , 2002) -EXT(MESA_texture_signed_rgba , EXT_texture_snorm , GLL, GLC, x , x , 2009) -EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000) -EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002) -EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999) -EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008) -EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001) -EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011) -EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010) -EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001) -EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005) -EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999) -EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000) -EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001) -EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002) -EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011) -EXT(NV_read_depth , dummy_true , x , x , x , ES2, 2011) -EXT(NV_read_depth_stencil , dummy_true , x , x , x , ES2, 2011) -EXT(NV_read_stencil , dummy_true , x , x , x , ES2, 2011) -EXT(NV_texgen_reflection , dummy_true , GLL, x , x , x , 1999) -EXT(NV_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2009) -EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999) -EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000) -EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010) EXT(S3_s3tc , ANGLE_texture_compression_dxt , GLL, GLC, x , x , 1999) + EXT(SGIS_generate_mipmap , dummy_true , GLL, x , x , x , 1997) EXT(SGIS_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 1997) EXT(SGIS_texture_edge_clamp , dummy_true , GLL, x , x , x , 1997) EXT(SGIS_texture_lod , dummy_true , GLL, x , x , x , 1997) + EXT(SUN_multi_draw_arrays , dummy_true , GLL, x , x , x , 1999) #undef GLL #undef GLC diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c index 45f343d61c8..1ad939cfde6 100644 --- a/src/mesa/main/fog.c +++ b/src/mesa/main/fog.c @@ -190,7 +190,7 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params ) } if (ctx->Driver.Fogfv) { - (*ctx->Driver.Fogfv)( ctx, pname, params ); + ctx->Driver.Fogfv( ctx, pname, params ); } return; diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index fbc7b8f8602..9b22b91ac1b 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -330,6 +330,9 @@ descriptor=[ # GL_KHR_context_flush_control [ "CONTEXT_RELEASE_BEHAVIOR", "CONTEXT_ENUM(Const.ContextReleaseBehavior), NO_EXTRA" ], + +# blend_func_extended + [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ], ]}, # GLES3 is not a typo. @@ -801,7 +804,6 @@ descriptor=[ # GL_ARB_robustness [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ], - [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ], # GL_ARB_uniform_buffer_object [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ], diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c index 9873fdbf1a4..87c5a3a194f 100644 --- a/src/mesa/main/getstring.c +++ b/src/mesa/main/getstring.c @@ -121,7 +121,7 @@ _mesa_GetString( GLenum name ) assert(ctx->Driver.GetString); { /* Give the driver the chance to handle this query */ - const GLubyte *str = (*ctx->Driver.GetString)(ctx, name); + const GLubyte *str = ctx->Driver.GetString(ctx, name); if (str) return str; } @@ -203,12 +203,18 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params ) { GET_CURRENT_CONTEXT(ctx); const GLuint clientUnit = ctx->Array.ActiveTexture; + const char *callerstr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetPointerv"; + else + callerstr = "glGetPointervKHR"; if (!params) return; if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_enum_to_string(pname)); + _mesa_debug(ctx, "%s %s\n", callerstr, _mesa_enum_to_string(pname)); switch (pname) { case GL_VERTEX_ARRAY_POINTER: @@ -268,10 +274,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params ) break; case GL_DEBUG_CALLBACK_FUNCTION_ARB: case GL_DEBUG_CALLBACK_USER_PARAM_ARB: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_pname; - else - *params = _mesa_get_debug_state_ptr(ctx, pname); + *params = _mesa_get_debug_state_ptr(ctx, pname); break; default: goto invalid_pname; @@ -280,7 +283,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params ) return; invalid_pname: - _mesa_error( ctx, GL_INVALID_ENUM, "glGetPointerv" ); + _mesa_error( ctx, GL_INVALID_ENUM, "%s", callerstr); return; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 95cbba4ed57..4a849fb090d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2193,6 +2193,7 @@ struct gl_ati_fragment_shader_state struct gl_subroutine_function { char *name; + int index; int num_compat_types; const struct glsl_type **types; }; @@ -3766,6 +3767,7 @@ struct gl_extensions GLboolean EXT_polygon_offset_clamp; GLboolean EXT_provoking_vertex; GLboolean EXT_shader_integer_mix; + GLboolean EXT_shader_samples_identical; GLboolean EXT_stencil_two_side; GLboolean EXT_texture_array; GLboolean EXT_texture_compression_latc; diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c index 1019f893ba8..41f370ce485 100644 --- a/src/mesa/main/objectlabel.c +++ b/src/mesa/main/objectlabel.c @@ -243,13 +243,19 @@ _mesa_ObjectLabel(GLenum identifier, GLuint name, GLsizei length, const GLchar *label) { GET_CURRENT_CONTEXT(ctx); + const char *callerstr; char **labelPtr; - labelPtr = get_label_pointer(ctx, identifier, name, "glObjectLabel"); + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glObjectLabel"; + else + callerstr = "glObjectLabelKHR"; + + labelPtr = get_label_pointer(ctx, identifier, name, callerstr); if (!labelPtr) return; - set_label(ctx, labelPtr, label, length, "glObjectLabel"); + set_label(ctx, labelPtr, label, length, callerstr); } void GLAPIENTRY @@ -257,15 +263,21 @@ _mesa_GetObjectLabel(GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label) { GET_CURRENT_CONTEXT(ctx); + const char *callerstr; char **labelPtr; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetObjectLabel"; + else + callerstr = "glGetObjectLabelKHR"; + if (bufSize < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectLabel(bufSize = %d)", + _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr, bufSize); return; } - labelPtr = get_label_pointer(ctx, identifier, name, "glGetObjectLabel"); + labelPtr = get_label_pointer(ctx, identifier, name, callerstr); if (!labelPtr) return; @@ -276,17 +288,24 @@ void GLAPIENTRY _mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label) { GET_CURRENT_CONTEXT(ctx); - char **labelPtr; struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr; + const char *callerstr; + char **labelPtr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glObjectPtrLabel"; + else + callerstr = "glObjectPtrLabelKHR"; if (!_mesa_validate_sync(ctx, syncObj)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glObjectPtrLabel (not a valid sync object)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)", + callerstr); return; } labelPtr = &syncObj->Label; - set_label(ctx, labelPtr, label, length, "glObjectPtrLabel"); + set_label(ctx, labelPtr, label, length, callerstr); } void GLAPIENTRY @@ -294,17 +313,24 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label) { GET_CURRENT_CONTEXT(ctx); - char **labelPtr; struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr; + const char *callerstr; + char **labelPtr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetObjectPtrLabel"; + else + callerstr = "glGetObjectPtrLabelKHR"; if (bufSize < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel(bufSize = %d)", + _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr, bufSize); return; } if (!_mesa_validate_sync(ctx, syncObj)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel (not a valid sync object)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)", + callerstr); return; } diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c index 863e3c1af32..c2f2b6399cb 100644 --- a/src/mesa/main/points.c +++ b/src/mesa/main/points.c @@ -209,7 +209,7 @@ _mesa_PointParameterfv( GLenum pname, const GLfloat *params) } if (ctx->Driver.PointParameterfv) - (*ctx->Driver.PointParameterfv)(ctx, pname, params); + ctx->Driver.PointParameterfv(ctx, pname, params); } diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 58ba04153e6..79a91b5b6bd 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -661,6 +661,13 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, switch (res->Type) { case GL_ATOMIC_COUNTER_BUFFER: return RESOURCE_ATC(res) - shProg->AtomicBuffers; + case GL_VERTEX_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE: + case GL_FRAGMENT_SUBROUTINE: + case GL_COMPUTE_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE: + return RESOURCE_SUB(res)->index; case GL_UNIFORM_BLOCK: case GL_SHADER_STORAGE_BLOCK: case GL_TRANSFORM_FEEDBACK_VARYING: @@ -1413,9 +1420,19 @@ _mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline) for (idx = prev + 1; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) { if (shProg[idx]) { - if (!validate_io(shProg[prev]->_LinkedShaders[prev], - shProg[idx]->_LinkedShaders[idx])) - return false; + /* Since we now only validate precision, we can skip this step for + * desktop GLSL shaders, there precision qualifier is ignored. + * + * From OpenGL 4.50 Shading Language spec, section 4.7: + * "For the purposes of determining if an output from one shader + * stage matches an input of the next stage, the precision + * qualifier need not match." + */ + if (shProg[prev]->IsES || shProg[idx]->IsES) { + if (!validate_io(shProg[prev]->_LinkedShaders[prev], + shProg[idx]->_LinkedShaders[idx])) + return false; + } prev = idx; } } diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am index bd7ab7365c0..d6977e20e85 100644 --- a/src/mesa/main/tests/Makefile.am +++ b/src/mesa/main/tests/Makefile.am @@ -27,6 +27,7 @@ AM_CPPFLAGS += -DHAVE_SHARED_GLAPI main_test_SOURCES += \ dispatch_sanity.cpp \ mesa_formats.cpp \ + mesa_extensions.cpp \ program_state_string.cpp main_test_LDADD += \ diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index abe0f432572..97f81f932f6 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2421,6 +2421,11 @@ const struct function gles3_functions_possible[] = { { "glProgramUniform4uiEXT", 30, -1 }, { "glProgramUniform4uivEXT", 30, -1 }, + /* GL_EXT_blend_func_extended */ + { "glBindFragDataLocationIndexedEXT", 30, -1 }, + { "glGetFragDataIndexEXT", 30, -1 }, + { "glBindFragDataLocationEXT", 30, -1 }, + { NULL, 0, -1 } }; @@ -2509,5 +2514,8 @@ const struct function gles31_functions_possible[] = { /* GL_EXT_buffer_storage */ { "glBufferStorageEXT", 31, -1 }, + /* GL_EXT_blend_func_extended */ + { "glGetProgramResourceLocationIndexEXT", 31, -1 }, + { NULL, 0, -1 }, }; diff --git a/src/mesa/main/tests/mesa_extensions.cpp b/src/mesa/main/tests/mesa_extensions.cpp new file mode 100644 index 00000000000..0c7addd4282 --- /dev/null +++ b/src/mesa/main/tests/mesa_extensions.cpp @@ -0,0 +1,51 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \name mesa_extensions.cpp + * + * Verify that the extensions table is sorted. + */ + +#include <gtest/gtest.h> +#include "util/macros.h" + +/** + * Debug/test: verify the extension table is alphabetically sorted. + */ +TEST(MesaExtensionsTest, AlphabeticallySorted) +{ + const char *ext_names[] = { + #define EXT(name_str, ...) #name_str, + #include "main/extensions_table.h" + #undef EXT + }; + + for (unsigned i = 0; i < ARRAY_SIZE(ext_names) - 1; ++i) { + const char *current_str = ext_names[i]; + const char *next_str = ext_names[i+1]; + + /* We expect the extension table to be alphabetically sorted */ + ASSERT_LT(strcmp(current_str, next_str), 0); + } +} diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c index 091922161c5..93c680650bb 100644 --- a/src/mesa/main/texenv.c +++ b/src/mesa/main/texenv.c @@ -495,7 +495,7 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param ) /* Tell device driver about the new texture environment */ if (ctx->Driver.TexEnv) { - (*ctx->Driver.TexEnv)( ctx, target, pname, param ); + ctx->Driver.TexEnv(ctx, target, pname, param); } } diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index d9453e3a281..ac7599f9fd4 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1333,21 +1333,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, break; case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_CUBE_MAP_ARRAY: - /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec: - * - * "The ETC2/EAC texture compression algorithm supports only - * two-dimensional images. If internalformat is an ETC2/EAC format, - * glCompressedTexImage3D will generate an INVALID_OPERATION error if - * target is not TEXTURE_2D_ARRAY." - * - * This should also be applicable for glTexStorage3D(). Other available - * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY. - */ - if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx)) - return write_error(error, GL_INVALID_OPERATION); - - target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array; - /* From the KHR_texture_compression_astc_hdr spec: * * Add a second new column "3D Tex." which is empty for all non-ASTC @@ -1368,16 +1353,24 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, * 8.19 is *not* checked' * * The instances of <internalformat> above should say <target>. + * + * ETC2/EAC formats are the only alternative in GLES and thus such errors + * have already been handled by normal ETC2/EAC behavior. */ - /* Throw an INVALID_OPERATION error if the target is - * TEXTURE_CUBE_MAP_ARRAY and the format is not ASTC. + /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec: + * + * "The ETC2/EAC texture compression algorithm supports only + * two-dimensional images. If internalformat is an ETC2/EAC format, + * glCompressedTexImage3D will generate an INVALID_OPERATION error if + * target is not TEXTURE_2D_ARRAY." + * + * This should also be applicable for glTexStorage3D(). Other available + * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY. */ - if (target_can_be_compresed && - ctx->Extensions.KHR_texture_compression_astc_ldr && - layout != MESA_FORMAT_LAYOUT_ASTC) - return write_error(error, GL_INVALID_OPERATION); - + if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx)) + return write_error(error, GL_INVALID_OPERATION); + target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array; break; case GL_TEXTURE_3D: switch (layout) { @@ -1401,12 +1394,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, return write_error(error, GL_INVALID_OPERATION); break; default: - /* Throw an INVALID_OPERATION error if the target is TEXTURE_3D and - * the format is not ASTC. - * See comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY for more info. - */ - if (ctx->Extensions.KHR_texture_compression_astc_ldr) - return write_error(error, GL_INVALID_OPERATION); break; } default: diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1099d79d834..c5d8c483429 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1920,6 +1920,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir) case ir_query_levels: assert(!"Unexpected ir_query_levels opcode"); break; + case ir_samples_identical: + unreachable("Unexpected ir_samples_identical opcode"); case ir_texture_samples: unreachable("Unexpected ir_texture_samples opcode"); } diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c index 1bb5be397ae..8fdf0e8497f 100644 --- a/src/mesa/state_tracker/st_cb_perfmon.c +++ b/src/mesa/state_tracker/st_cb_perfmon.c @@ -36,69 +36,24 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" -/** - * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if - * the driver-specific query doesn't exist. - */ -static int -find_query_type(struct pipe_screen *screen, const char *name) -{ - int num_queries; - int type = -1; - int i; - - num_queries = screen->get_driver_query_info(screen, 0, NULL); - if (!num_queries) - return type; - - for (i = 0; i < num_queries; i++) { - struct pipe_driver_query_info info; - - if (!screen->get_driver_query_info(screen, i, &info)) - continue; - - if (!strncmp(info.name, name, strlen(name))) { - type = info.query_type; - break; - } - } - return type; -} - -/** - * Return TRUE if the underlying driver expose GPU counters. - */ -static bool -has_gpu_counters(struct pipe_screen *screen) -{ - int num_groups, gid; - - num_groups = screen->get_driver_query_group_info(screen, 0, NULL); - for (gid = 0; gid < num_groups; gid++) { - struct pipe_driver_query_group_info group_info; - - if (!screen->get_driver_query_group_info(screen, gid, &group_info)) - continue; - - if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU) - return true; - } - return false; -} - static bool init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) { + struct st_context *st = st_context(ctx); struct st_perf_monitor_object *stm = st_perf_monitor_object(m); - struct pipe_screen *screen = st_context(ctx)->pipe->screen; - struct pipe_context *pipe = st_context(ctx)->pipe; + struct pipe_context *pipe = st->pipe; + unsigned *batch = NULL; + unsigned num_active_counters = 0; + unsigned max_batch_counters = 0; + unsigned num_batch_counters = 0; int gid, cid; - st_flush_bitmap_cache(st_context(ctx)); + st_flush_bitmap_cache(st); - /* Create a query for each active counter. */ + /* Determine the number of active counters. */ for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) { const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid]; + const struct st_perf_monitor_group *stg = &st->perfmon[gid]; if (m->ActiveGroups[gid] > g->MaxActiveCounters) { /* Maximum number of counters reached. Cannot start the session. */ @@ -109,53 +64,96 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) return false; } - for (cid = 0; cid < g->NumCounters; cid++) { - const struct gl_perf_monitor_counter *c = &g->Counters[cid]; - struct st_perf_counter_object *cntr; - int query_type; + num_active_counters += m->ActiveGroups[gid]; + if (stg->has_batch) + max_batch_counters += m->ActiveGroups[gid]; + } - if (!BITSET_TEST(m->ActiveCounters[gid], cid)) - continue; + if (!num_active_counters) + return true; + + stm->active_counters = CALLOC(num_active_counters, + sizeof(*stm->active_counters)); + if (!stm->active_counters) + return false; - query_type = find_query_type(screen, c->Name); - assert(query_type != -1); + if (max_batch_counters) { + batch = CALLOC(max_batch_counters, sizeof(*batch)); + if (!batch) + return false; + } + + /* Create a query for each active counter. */ + for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) { + const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid]; + const struct st_perf_monitor_group *stg = &st->perfmon[gid]; + BITSET_WORD tmp; - cntr = CALLOC_STRUCT(st_perf_counter_object); - if (!cntr) - return false; + BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) { + const struct st_perf_monitor_counter *stc = &stg->counters[cid]; + struct st_perf_counter_object *cntr = + &stm->active_counters[stm->num_active_counters]; - cntr->query = pipe->create_query(pipe, query_type, 0); cntr->id = cid; cntr->group_id = gid; - - list_addtail(&cntr->list, &stm->active_counters); + if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) { + cntr->batch_index = num_batch_counters; + batch[num_batch_counters++] = stc->query_type; + } else { + cntr->query = pipe->create_query(pipe, stc->query_type, 0); + if (!cntr->query) + goto fail; + } + ++stm->num_active_counters; } } + + /* Create the batch query. */ + if (num_batch_counters) { + stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters, + batch); + stm->batch_result = CALLOC(num_batch_counters, sizeof(stm->batch_result->batch[0])); + if (!stm->batch_query || !stm->batch_result) + goto fail; + } + + FREE(batch); return true; + +fail: + FREE(batch); + return false; } static void reset_perf_monitor(struct st_perf_monitor_object *stm, struct pipe_context *pipe) { - struct st_perf_counter_object *cntr, *tmp; + unsigned i; - LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, &stm->active_counters, list) { - if (cntr->query) - pipe->destroy_query(pipe, cntr->query); - list_del(&cntr->list); - free(cntr); + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; + if (query) + pipe->destroy_query(pipe, query); } + FREE(stm->active_counters); + stm->active_counters = NULL; + stm->num_active_counters = 0; + + if (stm->batch_query) { + pipe->destroy_query(pipe, stm->batch_query); + stm->batch_query = NULL; + } + FREE(stm->batch_result); + stm->batch_result = NULL; } static struct gl_perf_monitor_object * st_NewPerfMonitor(struct gl_context *ctx) { struct st_perf_monitor_object *stq = ST_CALLOC_STRUCT(st_perf_monitor_object); - if (stq) { - list_inithead(&stq->active_counters); + if (stq) return &stq->base; - } return NULL; } @@ -174,9 +172,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; - if (LIST_IS_EMPTY(&stm->active_counters)) { + if (!stm->num_active_counters) { /* Create a query for each active counter before starting * a new monitoring session. */ if (!init_perf_monitor(ctx, m)) @@ -184,10 +182,15 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) } /* Start the query for each active counter. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) { - if (!pipe->begin_query(pipe, cntr->query)) + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; + if (query && !pipe->begin_query(pipe, query)) goto fail; } + + if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query)) + goto fail; + return true; fail: @@ -201,11 +204,17 @@ st_EndPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; /* Stop the query for each active counter. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) - pipe->end_query(pipe, cntr->query); + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; + if (query) + pipe->end_query(pipe, query); + } + + if (stm->batch_query) + pipe->end_query(pipe, stm->batch_query); } static void @@ -229,20 +238,26 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx, { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; - if (LIST_IS_EMPTY(&stm->active_counters)) + if (!stm->num_active_counters) return false; /* The result of a monitoring session is only available if the query of * each active counter is idle. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) { + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; union pipe_query_result result; - if (!pipe->get_query_result(pipe, cntr->query, FALSE, &result)) { + if (query && !pipe->get_query_result(pipe, query, FALSE, &result)) { /* The query is busy. */ return false; } } + + if (stm->batch_query && + !pipe->get_query_result(pipe, stm->batch_query, FALSE, stm->batch_result)) + return false; + return true; } @@ -255,7 +270,7 @@ st_GetPerfMonitorResult(struct gl_context *ctx, { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; /* Copy data to the supplied array (data). * @@ -263,9 +278,15 @@ st_GetPerfMonitorResult(struct gl_context *ctx, * active counter. The API allows counters to appear in any order. */ GLsizei offset = 0; + bool have_batch_query = false; + + if (stm->batch_query) + have_batch_query = pipe->get_query_result(pipe, stm->batch_query, TRUE, + stm->batch_result); /* Read query results for each active counter. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) { + for (i = 0; i < stm->num_active_counters; ++i) { + struct st_perf_counter_object *cntr = &stm->active_counters[i]; union pipe_query_result result = { 0 }; int gid, cid; GLenum type; @@ -274,8 +295,14 @@ st_GetPerfMonitorResult(struct gl_context *ctx, gid = cntr->group_id; type = ctx->PerfMonitor.Groups[gid].Counters[cid].Type; - if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result)) - continue; + if (cntr->query) { + if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result)) + continue; + } else { + if (!have_batch_query) + continue; + result.batch[0] = stm->batch_result->batch[cntr->batch_index]; + } data[offset++] = gid; data[offset++] = cid; @@ -307,18 +334,13 @@ st_init_perfmon(struct st_context *st) struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor; struct pipe_screen *screen = st->pipe->screen; struct gl_perf_monitor_group *groups = NULL; + struct st_perf_monitor_group *stgroups = NULL; int num_counters, num_groups; int gid, cid; if (!screen->get_driver_query_info || !screen->get_driver_query_group_info) return false; - if (!has_gpu_counters(screen)) { - /* According to the spec, GL_AMD_performance_monitor must only - * expose GPU counters. */ - return false; - } - /* Get the number of available queries. */ num_counters = screen->get_driver_query_info(screen, 0, NULL); if (!num_counters) @@ -331,29 +353,37 @@ st_init_perfmon(struct st_context *st) if (!groups) return false; + stgroups = CALLOC(num_groups, sizeof(*stgroups)); + if (!stgroups) + goto fail_only_groups; + for (gid = 0; gid < num_groups; gid++) { struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups]; + struct st_perf_monitor_group *stg = &stgroups[perfmon->NumGroups]; struct pipe_driver_query_group_info group_info; struct gl_perf_monitor_counter *counters = NULL; + struct st_perf_monitor_counter *stcounters = NULL; if (!screen->get_driver_query_group_info(screen, gid, &group_info)) continue; - if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU) - continue; - g->Name = group_info.name; g->MaxActiveCounters = group_info.max_active_queries; - g->NumCounters = 0; - g->Counters = NULL; if (group_info.num_queries) counters = CALLOC(group_info.num_queries, sizeof(*counters)); if (!counters) goto fail; + g->Counters = counters; + + stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters)); + if (!stcounters) + goto fail; + stg->counters = stcounters; for (cid = 0; cid < num_counters; cid++) { struct gl_perf_monitor_counter *c = &counters[g->NumCounters]; + struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters]; struct pipe_driver_query_info info; if (!screen->get_driver_query_info(screen, cid, &info)) @@ -364,6 +394,9 @@ st_init_perfmon(struct st_context *st) c->Name = info.name; switch (info.type) { case PIPE_DRIVER_QUERY_TYPE_UINT64: + case PIPE_DRIVER_QUERY_TYPE_BYTES: + case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS: + case PIPE_DRIVER_QUERY_TYPE_HZ: c->Minimum.u64 = 0; c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1; c->Type = GL_UNSIGNED_INT64_AMD; @@ -386,18 +419,28 @@ st_init_perfmon(struct st_context *st) default: unreachable("Invalid driver query type!"); } + + stc->query_type = info.query_type; + stc->flags = info.flags; + if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) + stg->has_batch = true; + g->NumCounters++; } - g->Counters = counters; perfmon->NumGroups++; } perfmon->Groups = groups; + st->perfmon = stgroups; return true; fail: - for (gid = 0; gid < num_groups; gid++) + for (gid = 0; gid < num_groups; gid++) { + FREE(stgroups[gid].counters); FREE((void *)groups[gid].Counters); + } + FREE(stgroups); +fail_only_groups: FREE(groups); return false; } @@ -408,8 +451,11 @@ st_destroy_perfmon(struct st_context *st) struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor; int gid; - for (gid = 0; gid < perfmon->NumGroups; gid++) + for (gid = 0; gid < perfmon->NumGroups; gid++) { + FREE(st->perfmon[gid].counters); FREE((void *)perfmon->Groups[gid].Counters); + } + FREE(st->perfmon); FREE((void *)perfmon->Groups); } diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h index 0b195de47fe..29732866bf8 100644 --- a/src/mesa/state_tracker/st_cb_perfmon.h +++ b/src/mesa/state_tracker/st_cb_perfmon.h @@ -26,21 +26,41 @@ #include "util/list.h" +struct st_perf_counter_object +{ + struct pipe_query *query; + int id; + int group_id; + unsigned batch_index; +}; + /** * Subclass of gl_perf_monitor_object */ struct st_perf_monitor_object { struct gl_perf_monitor_object base; - struct list_head active_counters; + unsigned num_active_counters; + struct st_perf_counter_object *active_counters; + + struct pipe_query *batch_query; + union pipe_query_result *batch_result; }; -struct st_perf_counter_object +/** + * Extra data per counter, supplementing gl_perf_monitor_counter with + * driver-specific information. + */ +struct st_perf_monitor_counter { - struct list_head list; - struct pipe_query *query; - int id; - int group_id; + unsigned query_type; + unsigned flags; +}; + +struct st_perf_monitor_group +{ + struct st_perf_monitor_counter *counters; + bool has_batch; }; /** diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index c243f5cd966..60a9a4bb0d5 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -46,6 +46,7 @@ struct draw_stage; struct gen_mipmap_state; struct st_context; struct st_fragment_program; +struct st_perf_monitor_group; struct u_upload_mgr; @@ -217,6 +218,8 @@ struct st_context int32_t read_stamp; struct st_config_options options; + + struct st_perf_monitor_group *perfmon; }; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 99e96e1f3ae..a2418e28a91 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -343,7 +343,7 @@ struct st_extension_cap_mapping { struct st_extension_format_mapping { int extension_offset[2]; - enum pipe_format format[8]; + enum pipe_format format[32]; /* If TRUE, at least one format must be supported for the extensions to be * advertised. If FALSE, all the formats must be supported. */ @@ -569,6 +569,36 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_FORMAT_BPTC_RGB_FLOAT, PIPE_FORMAT_BPTC_RGB_UFLOAT } }, + { { o(KHR_texture_compression_astc_ldr) }, + { PIPE_FORMAT_ASTC_4x4, + PIPE_FORMAT_ASTC_5x4, + PIPE_FORMAT_ASTC_5x5, + PIPE_FORMAT_ASTC_6x5, + PIPE_FORMAT_ASTC_6x6, + PIPE_FORMAT_ASTC_8x5, + PIPE_FORMAT_ASTC_8x6, + PIPE_FORMAT_ASTC_8x8, + PIPE_FORMAT_ASTC_10x5, + PIPE_FORMAT_ASTC_10x6, + PIPE_FORMAT_ASTC_10x8, + PIPE_FORMAT_ASTC_10x10, + PIPE_FORMAT_ASTC_12x10, + PIPE_FORMAT_ASTC_12x12, + PIPE_FORMAT_ASTC_4x4_SRGB, + PIPE_FORMAT_ASTC_5x4_SRGB, + PIPE_FORMAT_ASTC_5x5_SRGB, + PIPE_FORMAT_ASTC_6x5_SRGB, + PIPE_FORMAT_ASTC_6x6_SRGB, + PIPE_FORMAT_ASTC_8x5_SRGB, + PIPE_FORMAT_ASTC_8x6_SRGB, + PIPE_FORMAT_ASTC_8x8_SRGB, + PIPE_FORMAT_ASTC_10x5_SRGB, + PIPE_FORMAT_ASTC_10x6_SRGB, + PIPE_FORMAT_ASTC_10x8_SRGB, + PIPE_FORMAT_ASTC_10x10_SRGB, + PIPE_FORMAT_ASTC_12x10_SRGB, + PIPE_FORMAT_ASTC_12x12_SRGB } }, + { { o(EXT_texture_shared_exponent) }, { PIPE_FORMAT_R9G9B9E5_FLOAT } }, diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 144b7d6f659..2b92bade440 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -465,6 +465,64 @@ st_mesa_format_to_pipe_format(struct st_context *st, mesa_format mesaFormat) case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: return st->has_etc2 ? PIPE_FORMAT_ETC2_SRGB8A1 : PIPE_FORMAT_B8G8R8A8_SRGB; + case MESA_FORMAT_RGBA_ASTC_4x4: + return PIPE_FORMAT_ASTC_4x4; + case MESA_FORMAT_RGBA_ASTC_5x4: + return PIPE_FORMAT_ASTC_5x4; + case MESA_FORMAT_RGBA_ASTC_5x5: + return PIPE_FORMAT_ASTC_5x5; + case MESA_FORMAT_RGBA_ASTC_6x5: + return PIPE_FORMAT_ASTC_6x5; + case MESA_FORMAT_RGBA_ASTC_6x6: + return PIPE_FORMAT_ASTC_6x6; + case MESA_FORMAT_RGBA_ASTC_8x5: + return PIPE_FORMAT_ASTC_8x5; + case MESA_FORMAT_RGBA_ASTC_8x6: + return PIPE_FORMAT_ASTC_8x6; + case MESA_FORMAT_RGBA_ASTC_8x8: + return PIPE_FORMAT_ASTC_8x8; + case MESA_FORMAT_RGBA_ASTC_10x5: + return PIPE_FORMAT_ASTC_10x5; + case MESA_FORMAT_RGBA_ASTC_10x6: + return PIPE_FORMAT_ASTC_10x6; + case MESA_FORMAT_RGBA_ASTC_10x8: + return PIPE_FORMAT_ASTC_10x8; + case MESA_FORMAT_RGBA_ASTC_10x10: + return PIPE_FORMAT_ASTC_10x10; + case MESA_FORMAT_RGBA_ASTC_12x10: + return PIPE_FORMAT_ASTC_12x10; + case MESA_FORMAT_RGBA_ASTC_12x12: + return PIPE_FORMAT_ASTC_12x12; + + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4: + return PIPE_FORMAT_ASTC_4x4_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4: + return PIPE_FORMAT_ASTC_5x4_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5: + return PIPE_FORMAT_ASTC_5x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5: + return PIPE_FORMAT_ASTC_6x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6: + return PIPE_FORMAT_ASTC_6x6_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5: + return PIPE_FORMAT_ASTC_8x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6: + return PIPE_FORMAT_ASTC_8x6_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8: + return PIPE_FORMAT_ASTC_8x8_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5: + return PIPE_FORMAT_ASTC_10x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6: + return PIPE_FORMAT_ASTC_10x6_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8: + return PIPE_FORMAT_ASTC_10x8_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10: + return PIPE_FORMAT_ASTC_10x10_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10: + return PIPE_FORMAT_ASTC_12x10_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12: + return PIPE_FORMAT_ASTC_12x12_SRGB; + default: return PIPE_FORMAT_NONE; } @@ -883,6 +941,64 @@ st_pipe_format_to_mesa_format(enum pipe_format format) case PIPE_FORMAT_ETC2_RG11_SNORM: return MESA_FORMAT_ETC2_SIGNED_RG11_EAC; + case PIPE_FORMAT_ASTC_4x4: + return MESA_FORMAT_RGBA_ASTC_4x4; + case PIPE_FORMAT_ASTC_5x4: + return MESA_FORMAT_RGBA_ASTC_5x4; + case PIPE_FORMAT_ASTC_5x5: + return MESA_FORMAT_RGBA_ASTC_5x5; + case PIPE_FORMAT_ASTC_6x5: + return MESA_FORMAT_RGBA_ASTC_6x5; + case PIPE_FORMAT_ASTC_6x6: + return MESA_FORMAT_RGBA_ASTC_6x6; + case PIPE_FORMAT_ASTC_8x5: + return MESA_FORMAT_RGBA_ASTC_8x5; + case PIPE_FORMAT_ASTC_8x6: + return MESA_FORMAT_RGBA_ASTC_8x6; + case PIPE_FORMAT_ASTC_8x8: + return MESA_FORMAT_RGBA_ASTC_8x8; + case PIPE_FORMAT_ASTC_10x5: + return MESA_FORMAT_RGBA_ASTC_10x5; + case PIPE_FORMAT_ASTC_10x6: + return MESA_FORMAT_RGBA_ASTC_10x6; + case PIPE_FORMAT_ASTC_10x8: + return MESA_FORMAT_RGBA_ASTC_10x8; + case PIPE_FORMAT_ASTC_10x10: + return MESA_FORMAT_RGBA_ASTC_10x10; + case PIPE_FORMAT_ASTC_12x10: + return MESA_FORMAT_RGBA_ASTC_12x10; + case PIPE_FORMAT_ASTC_12x12: + return MESA_FORMAT_RGBA_ASTC_12x12; + + case PIPE_FORMAT_ASTC_4x4_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4; + case PIPE_FORMAT_ASTC_5x4_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4; + case PIPE_FORMAT_ASTC_5x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5; + case PIPE_FORMAT_ASTC_6x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5; + case PIPE_FORMAT_ASTC_6x6_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6; + case PIPE_FORMAT_ASTC_8x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5; + case PIPE_FORMAT_ASTC_8x6_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6; + case PIPE_FORMAT_ASTC_8x8_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8; + case PIPE_FORMAT_ASTC_10x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5; + case PIPE_FORMAT_ASTC_10x6_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6; + case PIPE_FORMAT_ASTC_10x8_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8; + case PIPE_FORMAT_ASTC_10x10_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10; + case PIPE_FORMAT_ASTC_12x10_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10; + case PIPE_FORMAT_ASTC_12x12_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12; + default: return MESA_FORMAT_NONE; } @@ -1386,6 +1502,121 @@ static const struct format_mapping format_map[] = { { PIPE_FORMAT_BPTC_RGB_UFLOAT, 0 }, }, + /* ASTC */ + { + { GL_COMPRESSED_RGBA_ASTC_4x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_4x4, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x4, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_5x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_6x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x6, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_8x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_8x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x6, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_8x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x8, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x6, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x8, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x10, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_12x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x10, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_12x12_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x12, 0}, + }, + + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_4x4_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x4_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x6_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x6_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x8_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x6_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x8_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x10_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x10_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x12_SRGB, 0}, + }, + /* signed/unsigned integer formats. */ { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3ad1afdecda..40c77258de7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3236,6 +3236,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) case ir_texture_samples: opcode = TGSI_OPCODE_TXQS; break; + case ir_samples_identical: + unreachable("Unexpected ir_samples_identical opcode"); } if (ir->projector) { |