From 511ce2925baf90c1d93d3e6a389d31e8e7549493 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 23 Mar 2016 22:35:41 -0700 Subject: mesa: Check glReadBuffer enums against the ES3 table. From the ES 3.2 spec, section 16.1.1 (Selecting Buffers for Reading): "An INVALID_ENUM error is generated if src is not BACK or one of the values from table 15.5." Table 15.5 contains NONE and COLOR_ATTACHMENTi. Mesa properly returned INVALID_ENUM for unknown enums, but it decided what was known by using read_buffer_enum_to_index, which handles all enums in every API. So enums that were valid in GL were making it past the "valid enum" check. Such targets would then be classified as unsupported, and we'd raise INVALID_OPERATION, but that's technically the wrong error code. Fixes dEQP-GLES31's functional.debug.negative_coverage.get_error.buffer.read_buffer v2: Only call read_buffer_enuM_to_index when required (Eduardo). Signed-off-by: Kenneth Graunke Reviewed-by: Eduardo Lima Mitev --- src/mesa/main/buffers.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 26dafd1b786..a28c5831576 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -222,6 +222,12 @@ read_buffer_enum_to_index(GLenum buffer) } } +static bool +is_legal_es3_readbuffer_enum(GLenum buf) +{ + return buf == GL_BACK || buf == GL_NONE || + (buf >= GL_COLOR_ATTACHMENT0 && buf <= GL_COLOR_ATTACHMENT31); +} /** * Called by glDrawBuffer() and glNamedFramebufferDrawBuffer(). @@ -715,7 +721,11 @@ read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, } else { /* general case / window-system framebuffer */ - srcBuffer = read_buffer_enum_to_index(buffer); + if (_mesa_is_gles3(ctx) && !is_legal_es3_readbuffer_enum(buffer)) + srcBuffer = -1; + else + srcBuffer = read_buffer_enum_to_index(buffer); + if (srcBuffer == -1) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", caller, -- cgit v1.2.3 From 8683d54d2be82519c31e087e17dd936d13fa9d07 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 24 Mar 2016 12:11:01 +1100 Subject: glsl: reduce buffer block duplication This reduces some of the craziness required for handling buffer blocks. The problem is each shader stage holds its own information about a block in memory, we were copying that information to a program wide list but the per stage information remained meaning when a binding was updated we needed to update all versions of it. This changes the per stage blocks to instead point to a single version of the block information in the program list. Acked-by: Kenneth Graunke --- src/compiler/glsl/link_uniform_initializers.cpp | 2 +- src/compiler/glsl/link_uniforms.cpp | 12 ++-- src/compiler/glsl/linker.cpp | 78 +++++++++++++++---------- src/compiler/glsl/standalone_scaffolding.cpp | 5 -- src/mesa/main/mtypes.h | 9 +-- src/mesa/main/uniforms.c | 33 +---------- 6 files changed, 57 insertions(+), 82 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp index 3609f81771e..7d280ccf7fc 100644 --- a/src/compiler/glsl/link_uniform_initializers.cpp +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -183,7 +183,7 @@ set_block_binding(gl_shader_program *prog, const char *block_name, int binding) if (stage_index != -1) { struct gl_shader *sh = prog->_LinkedShaders[i]; - sh->BufferInterfaceBlocks[stage_index].Binding = binding; + sh->BufferInterfaceBlocks[stage_index]->Binding = binding; } } } diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp index 940cc61181d..807b069e3ed 100644 --- a/src/compiler/glsl/link_uniforms.cpp +++ b/src/compiler/glsl/link_uniforms.cpp @@ -954,6 +954,8 @@ link_cross_validate_uniform_block(void *mem_ctx, new_block->Uniforms, sizeof(*linked_block->Uniforms) * linked_block->NumUniforms); + linked_block->Name = ralloc_strdup(*linked_blocks, linked_block->Name); + for (unsigned int i = 0; i < linked_block->NumUniforms; i++) { struct gl_uniform_buffer_variable *ubo_var = &linked_block->Uniforms[i]; @@ -1005,9 +1007,9 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) const unsigned l = strlen(var->name); for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) { - for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i].NumUniforms; j++) { + for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i]->NumUniforms; j++) { if (sentinel) { - const char *begin = shader->BufferInterfaceBlocks[i].Uniforms[j].Name; + const char *begin = shader->BufferInterfaceBlocks[i]->Uniforms[j].Name; const char *end = strchr(begin, sentinel); if (end == NULL) @@ -1022,7 +1024,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) break; } } else if (!strcmp(var->name, - shader->BufferInterfaceBlocks[i].Uniforms[j].Name)) { + shader->BufferInterfaceBlocks[i]->Uniforms[j].Name)) { found = true; var->data.location = j; break; @@ -1148,9 +1150,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog, sh->num_combined_uniform_components = sh->num_uniform_components; for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) { - if (!sh->BufferInterfaceBlocks[i].IsShaderStorage) { + if (!sh->BufferInterfaceBlocks[i]->IsShaderStorage) { sh->num_combined_uniform_components += - sh->BufferInterfaceBlocks[i].UniformBufferSize / 4; + sh->BufferInterfaceBlocks[i]->UniformBufferSize / 4; } } } diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 76b700d3451..cd35464eeeb 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -1192,11 +1192,11 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) int index = link_cross_validate_uniform_block(prog, &prog->BufferInterfaceBlocks, &prog->NumBufferInterfaceBlocks, - &sh->BufferInterfaceBlocks[j]); + sh->BufferInterfaceBlocks[j]); if (index == -1) { linker_error(prog, "uniform block `%s' has mismatching definitions\n", - sh->BufferInterfaceBlocks[j].Name); + sh->BufferInterfaceBlocks[j]->Name); return false; } @@ -1204,6 +1204,23 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) } } + /* Update per stage block pointers to point to the program list. + * FIXME: We should be able to free the per stage blocks here. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { + int stage_index = + prog->InterfaceBlockStageIndex[i][j]; + + if (stage_index != -1) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + sh->BufferInterfaceBlocks[stage_index] = + &prog->BufferInterfaceBlocks[j]; + } + } + } + return true; } @@ -2069,9 +2086,15 @@ link_intrastage_shaders(void *mem_ctx, linked->ir = new(linked) exec_list; clone_ir_list(mem_ctx, linked->ir, main->ir); - linked->BufferInterfaceBlocks = uniform_blocks; + linked->BufferInterfaceBlocks = + ralloc_array(linked, gl_uniform_block *, num_uniform_blocks); + + ralloc_steal(linked, uniform_blocks); + for (unsigned i = 0; i < num_uniform_blocks; i++) { + linked->BufferInterfaceBlocks[i] = &uniform_blocks[i]; + } + linked->NumBufferInterfaceBlocks = num_uniform_blocks; - ralloc_steal(linked, linked->BufferInterfaceBlocks); link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders); @@ -2869,7 +2892,8 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->InterfaceBlockStageIndex[j][i] != -1) { struct gl_shader *sh = prog->_LinkedShaders[j]; int stage_index = prog->InterfaceBlockStageIndex[j][i]; - if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) { + if (sh && + sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage) { shader_blocks[j]++; total_shader_storage_blocks++; } else { @@ -2986,7 +3010,8 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { int stage_index = prog->InterfaceBlockStageIndex[i][j]; - if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) + if (stage_index != -1 && + sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage) total_shader_storage_blocks++; } @@ -4006,20 +4031,22 @@ link_assign_subroutine_types(struct gl_shader_program *prog) static void split_ubos_and_ssbos(void *mem_ctx, - struct gl_uniform_block *blocks, + struct gl_uniform_block **s_blks, + struct gl_uniform_block *p_blks, unsigned num_blocks, struct gl_uniform_block ***ubos, unsigned *num_ubos, - unsigned **ubo_interface_block_indices, struct gl_uniform_block ***ssbos, - unsigned *num_ssbos, - unsigned **ssbo_interface_block_indices) + unsigned *num_ssbos) { unsigned num_ubo_blocks = 0; unsigned num_ssbo_blocks = 0; + /* Are we spliting the list of blocks for the shader or the program */ + bool is_shader = p_blks == NULL; + for (unsigned i = 0; i < num_blocks; i++) { - if (blocks[i].IsShaderStorage) + if (is_shader ? s_blks[i]->IsShaderStorage : p_blks[i].IsShaderStorage) num_ssbo_blocks++; else num_ubo_blocks++; @@ -4031,24 +4058,13 @@ split_ubos_and_ssbos(void *mem_ctx, *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks); *num_ssbos = 0; - if (ubo_interface_block_indices) - *ubo_interface_block_indices = - ralloc_array(mem_ctx, unsigned, num_ubo_blocks); - - if (ssbo_interface_block_indices) - *ssbo_interface_block_indices = - ralloc_array(mem_ctx, unsigned, num_ssbo_blocks); - for (unsigned i = 0; i < num_blocks; i++) { - if (blocks[i].IsShaderStorage) { - (*ssbos)[*num_ssbos] = &blocks[i]; - if (ssbo_interface_block_indices) - (*ssbo_interface_block_indices)[*num_ssbos] = i; + struct gl_uniform_block *blk = is_shader ? s_blks[i] : &p_blks[i]; + if (blk->IsShaderStorage) { + (*ssbos)[*num_ssbos] = blk; (*num_ssbos)++; } else { - (*ubos)[*num_ubos] = &blocks[i]; - if (ubo_interface_block_indices) - (*ubo_interface_block_indices)[*num_ubos] = i; + (*ubos)[*num_ubos] = blk; (*num_ubos)++; } } @@ -4627,25 +4643,23 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) gl_shader *sh = prog->_LinkedShaders[i]; split_ubos_and_ssbos(sh, sh->BufferInterfaceBlocks, + NULL, sh->NumBufferInterfaceBlocks, &sh->UniformBlocks, &sh->NumUniformBlocks, - NULL, &sh->ShaderStorageBlocks, - &sh->NumShaderStorageBlocks, - NULL); + &sh->NumShaderStorageBlocks); } } split_ubos_and_ssbos(prog, + NULL, prog->BufferInterfaceBlocks, prog->NumBufferInterfaceBlocks, &prog->UniformBlocks, &prog->NumUniformBlocks, - &prog->UboInterfaceBlockIndex, &prog->ShaderStorageBlocks, - &prog->NumShaderStorageBlocks, - &prog->SsboInterfaceBlockIndex); + &prog->NumShaderStorageBlocks); for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] == NULL) diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp index d5d214b57cc..e350f702099 100644 --- a/src/compiler/glsl/standalone_scaffolding.cpp +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -124,11 +124,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) shProg->InterfaceBlockStageIndex[i] = NULL; } - ralloc_free(shProg->UboInterfaceBlockIndex); - shProg->UboInterfaceBlockIndex = NULL; - ralloc_free(shProg->SsboInterfaceBlockIndex); - shProg->SsboInterfaceBlockIndex = NULL; - ralloc_free(shProg->AtomicBuffers); shProg->AtomicBuffers = NULL; shProg->NumAtomicBuffers = 0; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 399f4508415..f050dddc4e8 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2306,7 +2306,7 @@ struct gl_shader * duplicated. */ unsigned NumBufferInterfaceBlocks; - struct gl_uniform_block *BufferInterfaceBlocks; + struct gl_uniform_block **BufferInterfaceBlocks; unsigned NumUniformBlocks; struct gl_uniform_block **UniformBlocks; @@ -2821,13 +2821,6 @@ struct gl_shader_program */ int *InterfaceBlockStageIndex[MESA_SHADER_STAGES]; - /** - * Indices into the BufferInterfaceBlocks[] array for Uniform Buffer - * Objects and Shader Storage Buffer Objects. - */ - unsigned *UboInterfaceBlockIndex; - unsigned *SsboInterfaceBlockIndex; - /** * Map of active uniform names to locations * diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index b1968b3f795..7dcbdccf442 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -1018,26 +1018,11 @@ _mesa_UniformBlockBinding(GLuint program, if (shProg->UniformBlocks[uniformBlockIndex]->Binding != uniformBlockBinding) { - int i; FLUSH_VERTICES(ctx, 0); ctx->NewDriverState |= ctx->DriverFlags.NewUniformBuffer; - const int interface_block_index = - shProg->UboInterfaceBlockIndex[uniformBlockIndex]; - - shProg->BufferInterfaceBlocks[interface_block_index].Binding = - uniformBlockBinding; - - for (i = 0; i < MESA_SHADER_STAGES; i++) { - int stage_index = - shProg->InterfaceBlockStageIndex[i][interface_block_index]; - - if (stage_index != -1) { - struct gl_shader *sh = shProg->_LinkedShaders[i]; - sh->BufferInterfaceBlocks[stage_index].Binding = uniformBlockBinding; - } - } + shProg->UniformBlocks[uniformBlockIndex]->Binding = uniformBlockBinding; } } @@ -1076,26 +1061,12 @@ _mesa_ShaderStorageBlockBinding(GLuint program, if (shProg->ShaderStorageBlocks[shaderStorageBlockIndex]->Binding != shaderStorageBlockBinding) { - int i; FLUSH_VERTICES(ctx, 0); ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer; - const int interface_block_index = - shProg->SsboInterfaceBlockIndex[shaderStorageBlockIndex]; - - shProg->BufferInterfaceBlocks[interface_block_index].Binding = + shProg->ShaderStorageBlocks[shaderStorageBlockIndex]->Binding = shaderStorageBlockBinding; - - for (i = 0; i < MESA_SHADER_STAGES; i++) { - int stage_index = - shProg->InterfaceBlockStageIndex[i][interface_block_index]; - - if (stage_index != -1) { - struct gl_shader *sh = shProg->_LinkedShaders[i]; - sh->BufferInterfaceBlocks[stage_index].Binding = shaderStorageBlockBinding; - } - } } } -- cgit v1.2.3 From 2df141087a56da20ad50b379b4a63426b870344a Mon Sep 17 00:00:00 2001 From: Edward O'Callaghan Date: Sat, 26 Mar 2016 18:35:07 +1100 Subject: mesa/st: Remove GLSLVersion clamping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While here, remove itermediate glsl_feature_level variable. Signed-off-by: Edward O'Callaghan Signed-off-by: Marek Olšák --- src/mesa/state_tracker/st_extensions.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 2fdaba073a2..0d6c6b196a1 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -552,7 +552,6 @@ void st_init_extensions(struct pipe_screen *screen, boolean has_lib_dxtc) { unsigned i; - int glsl_feature_level; GLboolean *extension_table = (GLboolean *) extensions; static const struct st_extension_cap_mapping cap_mapping[] = { @@ -844,12 +843,8 @@ void st_init_extensions(struct pipe_screen *screen, ARRAY_SIZE(vertex_mapping), PIPE_BUFFER, PIPE_BIND_VERTEX_BUFFER); - /* Figure out GLSL support. */ - glsl_feature_level = screen->get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL); - - consts->GLSLVersion = glsl_feature_level; - if (glsl_feature_level >= 410) - consts->GLSLVersion = 410; + /* Figure out GLSL support and set GLSLVersion to it. */ + consts->GLSLVersion = screen->get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL); _mesa_override_glsl_version(consts); @@ -858,9 +853,9 @@ void st_init_extensions(struct pipe_screen *screen, consts->ForceGLSLVersion = options->force_glsl_version; } - if (glsl_feature_level >= 400) + if (consts->GLSLVersion >= 400) extensions->ARB_gpu_shader5 = GL_TRUE; - if (glsl_feature_level >= 410) + if (consts->GLSLVersion >= 410) extensions->ARB_shader_precision = GL_TRUE; /* This extension needs full OpenGL 3.2, but we don't know if that's @@ -1036,7 +1031,7 @@ void st_init_extensions(struct pipe_screen *screen, consts->MaxViewports = screen->get_param(screen, PIPE_CAP_MAX_VIEWPORTS); if (consts->MaxViewports >= 16) { - if (glsl_feature_level >= 400) { + if (consts->GLSLVersion >= 400) { consts->ViewportBounds.Min = -32768.0; consts->ViewportBounds.Max = 32767.0; } else { -- cgit v1.2.3 From e2d5a6fac5c2b433cd78c0fc29b420b36c429cb9 Mon Sep 17 00:00:00 2001 From: Miklós Máté Date: Thu, 24 Mar 2016 01:12:55 +0100 Subject: mesa: optionally associate a gl_program to ATI_fragment_shader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the state tracker will use it Acked-by: Brian Paul Reviewed-by: Ian Romanick Signed-off-by: Miklós Máté Signed-off-by: Marek Olšák --- src/mesa/drivers/common/driverfuncs.c | 3 +++ src/mesa/main/atifragshader.c | 13 ++++++++++++- src/mesa/main/dd.h | 5 +++++ src/mesa/main/mtypes.h | 1 + src/mesa/main/state.c | 14 +++++++++++++- 5 files changed, 34 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index e96f92af5bb..2730b7b2f2a 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -117,6 +117,9 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->NewProgram = _mesa_new_program; driver->DeleteProgram = _mesa_delete_program; + /* ATI_fragment_shader */ + driver->NewATIfs = NULL; + /* simple state commands */ driver->AlphaFunc = NULL; driver->BlendColor = NULL; diff --git a/src/mesa/main/atifragshader.c b/src/mesa/main/atifragshader.c index 8fcbff6a7a4..34f45c68008 100644 --- a/src/mesa/main/atifragshader.c +++ b/src/mesa/main/atifragshader.c @@ -30,6 +30,7 @@ #include "main/mtypes.h" #include "main/dispatch.h" #include "main/atifragshader.h" +#include "program/program.h" #define MESA_DEBUG_ATI_FS 0 @@ -63,6 +64,7 @@ _mesa_delete_ati_fragment_shader(struct gl_context *ctx, struct ati_fragment_sha free(s->Instructions[i]); free(s->SetupInst[i]); } + _mesa_reference_program(ctx, &s->Program, NULL); free(s); } @@ -321,6 +323,8 @@ _mesa_BeginFragmentShaderATI(void) free(ctx->ATIFragmentShader.Current->SetupInst[i]); } + _mesa_reference_program(ctx, &ctx->ATIFragmentShader.Current->Program, NULL); + /* malloc the instructions here - not sure if the best place but its a start */ for (i = 0; i < MAX_NUM_PASSES_ATI; i++) { @@ -405,7 +409,14 @@ _mesa_EndFragmentShaderATI(void) } #endif - if (!ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_SHADER_ATI, NULL)) { + if (ctx->Driver.NewATIfs) { + struct gl_program *prog = ctx->Driver.NewATIfs(ctx, + ctx->ATIFragmentShader.Current); + _mesa_reference_program(ctx, &ctx->ATIFragmentShader.Current->Program, prog); + } + + if (!ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_SHADER_ATI, + curProg->Program)) { ctx->ATIFragmentShader.Current->isValid = GL_FALSE; /* XXX is this the right error? */ _mesa_error(ctx, GL_INVALID_OPERATION, diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 60bc8ef4411..d62fee690f4 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -476,6 +476,11 @@ struct dd_function_table { GLuint id); /** Delete a program */ void (*DeleteProgram)(struct gl_context *ctx, struct gl_program *prog); + /** + * Allocate a program to associate with the new ATI fragment shader (optional) + */ + struct gl_program * (*NewATIfs)(struct gl_context *ctx, + struct ati_fragment_shader *curProg); /** * Notify driver that a program string (and GPU code) has been specified * or modified. Return GL_TRUE or GL_FALSE to indicate if the program is diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index f050dddc4e8..c2c86a6e0d1 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2196,6 +2196,7 @@ struct ati_fragment_shader GLboolean interpinp1; GLboolean isValid; GLuint swizzlerq; + struct gl_program *Program; }; /** diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 917ae4da023..bf6035e0142 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -124,7 +124,8 @@ update_program(struct gl_context *ctx) * follows: * 1. OpenGL 2.0/ARB vertex/fragment shaders * 2. ARB/NV vertex/fragment programs - * 3. Programs derived from fixed-function state. + * 3. ATI fragment shader + * 4. Programs derived from fixed-function state. * * Note: it's possible for a vertex shader to get used with a fragment * program (and vice versa) here, but in practice that shouldn't ever @@ -152,6 +153,17 @@ update_program(struct gl_context *ctx) _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, NULL); } + else if (ctx->ATIFragmentShader._Enabled && + ctx->ATIFragmentShader.Current->Program) { + /* Use the enabled ATI fragment shader's associated program */ + _mesa_reference_shader_program(ctx, + &ctx->_Shader->_CurrentFragmentProgram, + NULL); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + gl_fragment_program(ctx->ATIFragmentShader.Current->Program)); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, + NULL); + } else if (ctx->FragmentProgram._MaintainTexEnvProgram) { /* Use fragment program generated from fixed-function state */ struct gl_shader_program *f = _mesa_get_fixed_func_fragment_program(ctx); -- cgit v1.2.3 From d71c1e9e54d379ff312dca7eb4d717e3f20e4099 Mon Sep 17 00:00:00 2001 From: Miklós Máté Date: Thu, 24 Mar 2016 01:12:56 +0100 Subject: program: add ATI_fragment_shader to shader stages list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miklós Máté Signed-off-by: Marek Olšák --- src/mesa/program/program.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h index 24e05974dc3..09e69280d46 100644 --- a/src/mesa/program/program.h +++ b/src/mesa/program/program.h @@ -172,6 +172,8 @@ _mesa_program_enum_to_shader_stage(GLenum v) return MESA_SHADER_VERTEX; case GL_FRAGMENT_PROGRAM_ARB: return MESA_SHADER_FRAGMENT; + case GL_FRAGMENT_SHADER_ATI: + return MESA_SHADER_FRAGMENT; case GL_GEOMETRY_PROGRAM_NV: return MESA_SHADER_GEOMETRY; case GL_TESS_CONTROL_PROGRAM_NV: -- cgit v1.2.3 From dee274477fb36aebc0ebf3eb1a9c58d875ad7a6f Mon Sep 17 00:00:00 2001 From: Miklós Máté Date: Thu, 24 Mar 2016 01:12:57 +0100 Subject: st/mesa: implement GL_ATI_fragment_shader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: fix arithmetic for special opcodes, fix fog state, cleanup v3: simplify handling of special opcodes, fix rebinding with different textargets or fog equation, lots of formatting fixes v4: adapt to the compile early, fix later architecture, formatting fixes Signed-off-by: Miklós Máté Signed-off-by: Marek Olšák --- src/mesa/Makefile.sources | 1 + src/mesa/main/atifragshader.h | 1 + src/mesa/state_tracker/st_atifs_to_tgsi.c | 845 ++++++++++++++++++++++++++++++ src/mesa/state_tracker/st_atifs_to_tgsi.h | 67 +++ src/mesa/state_tracker/st_atom_constbuf.c | 15 + src/mesa/state_tracker/st_atom_shader.c | 65 ++- src/mesa/state_tracker/st_cb_drawpixels.c | 1 + src/mesa/state_tracker/st_cb_program.c | 31 ++ src/mesa/state_tracker/st_program.c | 34 +- src/mesa/state_tracker/st_program.h | 8 + 10 files changed, 1064 insertions(+), 4 deletions(-) create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.c create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.h (limited to 'src/mesa') diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index a6c12c64828..54601a956fd 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -395,6 +395,7 @@ VBO_FILES = \ vbo/vbo_split_inplace.c STATETRACKER_FILES = \ + state_tracker/st_atifs_to_tgsi.c \ state_tracker/st_atom_array.c \ state_tracker/st_atom_atomicbuf.c \ state_tracker/st_atom_blend.c \ diff --git a/src/mesa/main/atifragshader.h b/src/mesa/main/atifragshader.h index 59011341018..0e32795da3b 100644 --- a/src/mesa/main/atifragshader.h +++ b/src/mesa/main/atifragshader.h @@ -16,6 +16,7 @@ struct gl_context; #define MAX_NUM_INSTRUCTIONS_PER_PASS_ATI 8 #define MAX_NUM_PASSES_ATI 2 #define MAX_NUM_FRAGMENT_REGISTERS_ATI 6 +#define MAX_NUM_FRAGMENT_CONSTANTS_ATI 8 struct ati_fs_opcode_st { diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c new file mode 100644 index 00000000000..66f442aee5a --- /dev/null +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c @@ -0,0 +1,845 @@ +/* + * Copyright (C) 2016 Miklós Máté + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "main/mtypes.h" +#include "main/atifragshader.h" +#include "main/errors.h" +#include "program/prog_parameter.h" + +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_transform.h" + +#include "st_program.h" +#include "st_atifs_to_tgsi.h" + +/** + * Intermediate state used during shader translation. + */ +struct st_translate { + struct ureg_program *ureg; + struct ati_fragment_shader *atifs; + + struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_src *constants; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + unsigned current_pass; + + bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI]; + + boolean error; +}; + +struct instruction_desc { + unsigned TGSI_opcode; + const char *name; + unsigned char arg_count; +}; + +static const struct instruction_desc inst_desc[] = { + {TGSI_OPCODE_MOV, "MOV", 1}, + {TGSI_OPCODE_NOP, "UND", 0}, /* unused */ + {TGSI_OPCODE_ADD, "ADD", 2}, + {TGSI_OPCODE_MUL, "MUL", 2}, + {TGSI_OPCODE_SUB, "SUB", 2}, + {TGSI_OPCODE_DP3, "DOT3", 2}, + {TGSI_OPCODE_DP4, "DOT4", 2}, + {TGSI_OPCODE_MAD, "MAD", 3}, + {TGSI_OPCODE_LRP, "LERP", 3}, + {TGSI_OPCODE_NOP, "CND", 3}, + {TGSI_OPCODE_NOP, "CND0", 3}, + {TGSI_OPCODE_NOP, "DOT2_ADD", 3} +}; + +static struct ureg_dst +get_temp(struct st_translate *t, unsigned index) +{ + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary(t->ureg); + return t->temps[index]; +} + +static struct ureg_src +apply_swizzle(struct st_translate *t, + struct ureg_src src, GLuint swizzle) +{ + if (swizzle == GL_SWIZZLE_STR_ATI) { + return src; + } else if (swizzle == GL_SWIZZLE_STQ_ATI) { + return ureg_swizzle(src, + TGSI_SWIZZLE_X, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_W, + TGSI_SWIZZLE_Z); + } else { + struct ureg_dst tmp[2]; + struct ureg_src imm[3]; + + tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); + tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 1); + imm[0] = src; + imm[1] = ureg_imm4f(t->ureg, 1.0f, 1.0f, 0.0f, 0.0f); + imm[2] = ureg_imm4f(t->ureg, 0.0f, 0.0f, 1.0f, 1.0f); + ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3); + + if (swizzle == GL_SWIZZLE_STR_DR_ATI) { + imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z); + } else { + imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W); + } + ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1); + + imm[0] = ureg_src(tmp[0]); + imm[1] = ureg_src(tmp[1]); + ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2); + + return ureg_src(tmp[0]); + } +} + +static struct ureg_src +get_source(struct st_translate *t, GLuint src_type) +{ + if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) { + if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) { + return ureg_src(get_temp(t, src_type - GL_REG_0_ATI)); + } else { + return ureg_imm1f(t->ureg, 0.0f); + } + } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) { + return t->constants[src_type - GL_CON_0_ATI]; + } else if (src_type == GL_ZERO) { + return ureg_imm1f(t->ureg, 0.0f); + } else if (src_type == GL_ONE) { + return ureg_imm1f(t->ureg, 1.0f); + } else if (src_type == GL_PRIMARY_COLOR_ARB) { + return t->inputs[t->inputMapping[VARYING_SLOT_COL0]]; + } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) { + return t->inputs[t->inputMapping[VARYING_SLOT_COL1]]; + } else { + /* frontend prevents this */ + unreachable("unknown source"); + } +} + +static struct ureg_src +prepare_argument(struct st_translate *t, const unsigned argId, + const struct atifragshader_src_register *srcReg) +{ + struct ureg_src src = get_source(t, srcReg->Index); + struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId); + + switch (srcReg->argRep) { + case GL_NONE: + break; + case GL_RED: + src = ureg_scalar(src, TGSI_SWIZZLE_X); + break; + case GL_GREEN: + src = ureg_scalar(src, TGSI_SWIZZLE_Y); + break; + case GL_BLUE: + src = ureg_scalar(src, TGSI_SWIZZLE_Z); + break; + case GL_ALPHA: + src = ureg_scalar(src, TGSI_SWIZZLE_W); + break; + } + ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1); + + if (srcReg->argMod & GL_COMP_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_imm1f(t->ureg, 1.0f); + modsrc[1] = ureg_src(arg); + + ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + } + if (srcReg->argMod & GL_BIAS_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_src(arg); + modsrc[1] = ureg_imm1f(t->ureg, 0.5f); + + ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + } + if (srcReg->argMod & GL_2X_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_src(arg); + modsrc[1] = ureg_src(arg); + + ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); + } + if (srcReg->argMod & GL_NEGATE_BIT_ATI) { + struct ureg_src modsrc[2]; + modsrc[0] = ureg_src(arg); + modsrc[1] = ureg_imm1f(t->ureg, -1.0f); + + ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2); + } + return ureg_src(arg); +} + +/* These instructions need special treatment */ +static void +emit_special_inst(struct st_translate *t, const struct instruction_desc *desc, + struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) +{ + struct ureg_dst tmp[1]; + struct ureg_src src[3]; + + if (!strcmp(desc->name, "CND")) { + tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */ + src[0] = ureg_imm1f(t->ureg, 0.5f); + src[1] = args[2]; + ureg_insn(t->ureg, TGSI_OPCODE_SUB, tmp, 1, src, 2); + src[0] = ureg_src(tmp[0]); + src[1] = args[0]; + src[2] = args[1]; + ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); + } else if (!strcmp(desc->name, "CND0")) { + src[0] = args[2]; + src[1] = args[1]; + src[2] = args[0]; + ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); + } else if (!strcmp(desc->name, "DOT2_ADD")) { + /* note: DP2A is not implemented in most pipe drivers */ + tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */ + src[0] = args[0]; + src[1] = args[1]; + ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2); + src[0] = ureg_src(tmp[0]); + src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z); + ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2); + } +} + +static void +emit_arith_inst(struct st_translate *t, + const struct instruction_desc *desc, + struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) +{ + if (desc->TGSI_opcode == TGSI_OPCODE_NOP) { + return emit_special_inst(t, desc, dst, args, argcount); + } + + ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount); +} + +static void +emit_dstmod(struct st_translate *t, + struct ureg_dst dst, GLuint dstMod) +{ + float imm; + struct ureg_src src[3]; + GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI; + + if (dstMod == GL_NONE) { + return; + } + + switch (scale) { + case GL_2X_BIT_ATI: + imm = 2.0f; + break; + case GL_4X_BIT_ATI: + imm = 4.0f; + break; + case GL_8X_BIT_ATI: + imm = 8.0f; + break; + case GL_HALF_BIT_ATI: + imm = 0.5f; + break; + case GL_QUARTER_BIT_ATI: + imm = 0.25f; + break; + case GL_EIGHTH_BIT_ATI: + imm = 0.125f; + break; + default: + imm = 1.0f; + } + + src[0] = ureg_src(dst); + src[1] = ureg_imm1f(t->ureg, imm); + if (dstMod & GL_SATURATE_BIT_ATI) { + dst = ureg_saturate(dst); + } + ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2); +} + +/** + * Compile one setup instruction to TGSI instructions. + */ +static void +compile_setupinst(struct st_translate *t, + const unsigned r, + const struct atifs_setupinst *texinst) +{ + struct ureg_dst dst[1]; + struct ureg_src src[2]; + + if (!texinst->Opcode) + return; + + dst[0] = get_temp(t, r); + + GLuint pass_tex = texinst->src; + + if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { + unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0; + + src[0] = t->inputs[t->inputMapping[attr]]; + } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { + unsigned reg = pass_tex - GL_REG_0_ATI; + + /* the frontend already validated that REG is only allowed in second pass */ + if (t->regs_written[0][reg]) { + src[0] = ureg_src(t->temps[reg]); + } else { + src[0] = ureg_imm1f(t->ureg, 0.0f); + } + } + src[0] = apply_swizzle(t, src[0], texinst->swizzle); + + if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) { + /* by default texture and sampler indexes are the same */ + src[1] = t->samplers[r]; + /* the texture target is still unknown, it will be fixed in the draw call */ + ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D, + NULL, 0, src, 2); + } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1); + } + + t->regs_written[t->current_pass][r] = true; +} + +/** + * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions. + */ +static void +compile_instruction(struct st_translate *t, + const struct atifs_instruction *inst) +{ + unsigned optype; + + for (optype = 0; optype < 2; optype++) { /* color, alpha */ + const struct instruction_desc *desc; + struct ureg_dst dst[1]; + struct ureg_src args[3]; /* arguments for the main operation */ + unsigned arg; + unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI; + + if (!inst->Opcode[optype]) + continue; + + desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI]; + + /* prepare the arguments */ + for (arg = 0; arg < desc->arg_count; arg++) { + if (arg >= inst->ArgCount[optype]) { + _mesa_warning(0, "Using 0 for missing argument %d of %s\n", + arg, desc->name); + args[arg] = ureg_imm1f(t->ureg, 0.0f); + } else { + args[arg] = prepare_argument(t, arg, + &inst->SrcReg[optype][arg]); + } + } + + /* prepare dst */ + dst[0] = get_temp(t, dstreg); + + if (optype) { + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W); + } else { + GLuint dstMask = inst->DstReg[optype].dstMask; + if (dstMask == GL_NONE) { + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ); + } else { + dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */ + } + } + + /* emit the main instruction */ + emit_arith_inst(t, desc, dst, args, arg); + + emit_dstmod(t, *dst, inst->DstReg[optype].dstMod); + + t->regs_written[t->current_pass][dstreg] = true; + } +} + +static void +finalize_shader(struct st_translate *t, unsigned numPasses) +{ + struct ureg_dst dst[1] = { { 0 } }; + struct ureg_src src[1] = { { 0 } }; + + if (t->regs_written[numPasses-1][0]) { + /* copy the result into the OUT slot */ + dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]]; + src[0] = ureg_src(t->temps[0]); + ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1); + } + + /* signal the end of the program */ + ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0); +} + +/** + * Called when a new variant is needed, we need to translate + * the ATI fragment shader to TGSI + */ +enum pipe_error +st_translate_atifs_program( + struct ureg_program *ureg, + struct ati_fragment_shader *atifs, + struct gl_program *program, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[]) +{ + enum pipe_error ret = PIPE_OK; + + unsigned pass, i, r; + + struct st_translate translate, *t; + t = &translate; + memset(t, 0, sizeof *t); + + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg; + t->atifs = atifs; + + /* + * Declare input attributes. + */ + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); + } + + /* + * Declare output attributes: + * we always have numOutputs=1 and it's FRAG_RESULT_COLOR + */ + t->outputs[0] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[0]); + + /* Emit constants and immediates. Mesa uses a single index space + * for these, so we put all the translated regs in t->constants. + */ + if (program->Parameters) { + t->constants = calloc(program->Parameters->NumParameters, + sizeof t->constants[0]); + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + + for (i = 0; i < program->Parameters->NumParameters; i++) { + switch (program->Parameters->Parameters[i].Type) { + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + t->constants[i] = ureg_DECL_constant(ureg, i); + break; + case PROGRAM_CONSTANT: + t->constants[i] = + ureg_DECL_immediate(ureg, + (const float*)program->Parameters->ParameterValues[i], + 4); + break; + default: + break; + } + } + } + + /* texture samplers */ + for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) { + if (program->SamplersUsed & (1 << i)) { + t->samplers[i] = ureg_DECL_sampler(ureg, i); + /* the texture target is still unknown, it will be fixed in the draw call */ + ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT); + } + } + + /* emit instructions */ + for (pass = 0; pass < atifs->NumPasses; pass++) { + t->current_pass = pass; + for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) { + struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r]; + compile_setupinst(t, r, texinst); + } + for (i = 0; i < atifs->numArithInstr[pass]; i++) { + struct atifs_instruction *inst = &atifs->Instructions[pass][i]; + compile_instruction(t, inst); + } + } + + finalize_shader(t, atifs->NumPasses); + +out: + free(t->constants); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __func__); + } + + return ret; +} + +/** + * Called in ProgramStringNotify, we need to fill the metadata of the + * gl_program attached to the ati_fragment_shader + */ +void +st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog) +{ + /* we know this is st_fragment_program, because of st_new_ati_fs() */ + struct st_fragment_program *stfp = (struct st_fragment_program *) prog; + struct ati_fragment_shader *atifs = stfp->ati_fs; + + unsigned pass, i, r, optype, arg; + + static const gl_state_index fog_params_state[STATE_LENGTH] = + {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0}; + static const gl_state_index fog_color[STATE_LENGTH] = + {STATE_FOG_COLOR, 0, 0, 0, 0}; + + prog->InputsRead = 0; + prog->OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR); + prog->SamplersUsed = 0; + prog->Parameters = _mesa_new_parameter_list(); + + /* fill in InputsRead, SamplersUsed, TexturesUsed */ + for (pass = 0; pass < atifs->NumPasses; pass++) { + for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) { + struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r]; + GLuint pass_tex = texinst->src; + + if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) { + /* mark which texcoords are used */ + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB); + /* by default there is 1:1 mapping between samplers and textures */ + prog->SamplersUsed |= (1 << r); + /* the target is unknown here, it will be fixed in the draw call */ + prog->TexturesUsed[r] = TEXTURE_2D_BIT; + } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB); + } + } + } + } + for (pass = 0; pass < atifs->NumPasses; pass++) { + for (i = 0; i < atifs->numArithInstr[pass]; i++) { + struct atifs_instruction *inst = &atifs->Instructions[pass][i]; + + for (optype = 0; optype < 2; optype++) { /* color, alpha */ + if (inst->Opcode[optype]) { + for (arg = 0; arg < inst->ArgCount[optype]; arg++) { + GLint index = inst->SrcReg[optype][arg].Index; + if (index == GL_PRIMARY_COLOR_EXT) { + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL0); + } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) { + /* note: ATI_fragment_shader.txt never specifies what + * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses + * VARYING_SLOT_COL1 for this input */ + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + } + } + } + } + } + /* we may need fog */ + prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_FOGC); + + /* we always have the ATI_fs constants, and the fog params */ + for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) { + _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM, + NULL, 4, GL_FLOAT, NULL, NULL); + } + _mesa_add_state_reference(prog->Parameters, fog_params_state); + _mesa_add_state_reference(prog->Parameters, fog_color); + + prog->NumInstructions = 0; + prog->NumTemporaries = MAX_NUM_FRAGMENT_REGISTERS_ATI + 3; /* 3 input temps for arith ops */ + prog->NumParameters = MAX_NUM_FRAGMENT_CONSTANTS_ATI + 2; /* 2 state variables for fog */ +} + + +struct tgsi_atifs_transform { + struct tgsi_transform_context base; + struct tgsi_shader_info info; + const struct st_fp_variant_key *key; + bool first_instruction_emitted; + unsigned fog_factor_temp; + unsigned fog_clamp_imm; +}; + +static inline struct tgsi_atifs_transform * +tgsi_atifs_transform(struct tgsi_transform_context *tctx) +{ + return (struct tgsi_atifs_transform *)tctx; +} + +/* copied from st_cb_drawpixels_shader.c */ +static void +set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index, + unsigned x, unsigned y, unsigned z, unsigned w) +{ + inst->Src[i].Register.File = file; + inst->Src[i].Register.Index = index; + inst->Src[i].Register.SwizzleX = x; + inst->Src[i].Register.SwizzleY = y; + inst->Src[i].Register.SwizzleZ = z; + inst->Src[i].Register.SwizzleW = w; +} + +#define SET_SRC(inst, i, file, index, x, y, z, w) \ + set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \ + TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w) + +static void +transform_decl(struct tgsi_transform_context *tctx, + struct tgsi_full_declaration *decl) +{ + struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx); + + if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { + /* fix texture target */ + unsigned newtarget = ctx->key->texture_targets[decl->Range.First]; + if (newtarget) + decl->SamplerView.Resource = newtarget; + } + + tctx->emit_declaration(tctx, decl); +} + +static void +transform_instr(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *current_inst) +{ + struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx); + + if (ctx->first_instruction_emitted) + goto transform_inst; + + ctx->first_instruction_emitted = true; + + if (ctx->key->fog) { + /* add a new temp for the fog factor */ + ctx->fog_factor_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1; + tgsi_transform_temp_decl(tctx, ctx->fog_factor_temp); + + /* add immediates for clamp */ + ctx->fog_clamp_imm = ctx->info.immediate_count; + tgsi_transform_immediate_decl(tctx, 1.0f, 0.0f, 0.0f, 0.0f); + } + +transform_inst: + if (current_inst->Instruction.Opcode == TGSI_OPCODE_TEX) { + /* fix texture target */ + unsigned newtarget = ctx->key->texture_targets[current_inst->Src[1].Register.Index]; + if (newtarget) + current_inst->Texture.Texture = newtarget; + + } else if (ctx->key->fog && current_inst->Instruction.Opcode == TGSI_OPCODE_MOV && + current_inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { + struct tgsi_full_instruction inst; + unsigned i; + int fogc_index = -1; + + /* find FOGC input */ + for (i = 0; i < ctx->info.num_inputs; i++) { + if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FOG) { + fogc_index = i; + break; + } + } + if (fogc_index < 0) { + /* should never be reached, because fog coord input is always declared */ + tctx->emit_instruction(tctx, current_inst); + return; + } + + /* compute the 1 component fog factor f */ + if (ctx->key->fog == 1) { + /* LINEAR formula: f = (end - z) / (end - start) + * with optimized parameters: + * f = MAD(fogcoord, oparams.x, oparams.y) + */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MAD; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->fog_factor_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 3; + SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W); + SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, X, X, X, X); + SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Y, Y, Y, Y); + tctx->emit_instruction(tctx, &inst); + } else if (ctx->key->fog == 2) { + /* EXP formula: f = exp(-dens * z) + * with optimized parameters: + * f = MUL(fogcoord, oparams.z); f= EX2(-f) + */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->fog_factor_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 2; + SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W); + SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Z, Z, Z, Z); + tctx->emit_instruction(tctx, &inst); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_EX2; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->fog_factor_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 1; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); + inst.Src[0].Register.Negate = 1; + tctx->emit_instruction(tctx, &inst); + } else if (ctx->key->fog == 3) { + /* EXP2 formula: f = exp(-(dens * z)^2) + * with optimized parameters: + * f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f) + */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->fog_factor_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 2; + SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W); + SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, W, W, W, W); + tctx->emit_instruction(tctx, &inst); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->fog_factor_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 2; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); + SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); + tctx->emit_instruction(tctx, &inst); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_EX2; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->fog_factor_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 1; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); + inst.Src[0].Register.Negate ^= 1; + tctx->emit_instruction(tctx, &inst); + } + /* f = CLAMP(f, 0.0, 1.0) */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_CLAMP; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->fog_factor_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 3; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); + SET_SRC(&inst, 1, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, Y, Y, Y, Y); // 0.0 + SET_SRC(&inst, 2, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, X, X, X, X); // 1.0 + tctx->emit_instruction(tctx, &inst); + + /* REG0 = LRP(f, REG0, fogcolor) */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LRP; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = 0; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + inst.Instruction.NumSrcRegs = 3; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, X, X, Y); + SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, 0, X, Y, Z, W); + SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI + 1, X, Y, Z, W); + tctx->emit_instruction(tctx, &inst); + } + + tctx->emit_instruction(tctx, current_inst); +} + +/* + * A post-process step in the draw call to fix texture targets and + * insert code for fog. + */ +const struct tgsi_token * +st_fixup_atifs(const struct tgsi_token *tokens, + const struct st_fp_variant_key *key) +{ + struct tgsi_atifs_transform ctx; + struct tgsi_token *newtoks; + int newlen; + + memset(&ctx, 0, sizeof(ctx)); + ctx.base.transform_declaration = transform_decl; + ctx.base.transform_instruction = transform_instr; + ctx.key = key; + tgsi_scan_shader(tokens, &ctx.info); + + newlen = tgsi_num_tokens(tokens) + 30; + newtoks = tgsi_alloc_tokens(newlen); + if (!newtoks) + return NULL; + + tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); + return newtoks; +} + diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.h b/src/mesa/state_tracker/st_atifs_to_tgsi.h new file mode 100644 index 00000000000..c1b6758ba02 --- /dev/null +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2016 Miklós Máté + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef ST_ATIFS_TO_TGSI_H +#define ST_ATIFS_TO_TGSI_H + +#if defined __cplusplus +extern "C" { +#endif + +#include "main/glheader.h" +#include "pipe/p_defines.h" + +struct gl_context; +struct gl_program; +struct ureg_program; +struct tgsi_token; +struct ati_fragment_shader; +struct st_fp_variant_key; + +enum pipe_error +st_translate_atifs_program( + struct ureg_program *ureg, + struct ati_fragment_shader *atifs, + struct gl_program *program, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[]); + + +void +st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog); + +const struct tgsi_token * +st_fixup_atifs(const struct tgsi_token *tokens, + const struct st_fp_variant_key *key); + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* ST_ATIFS_TO_TGSI_H */ diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 407dfd31c80..4d9b344111b 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -64,6 +64,21 @@ void st_upload_constants( struct st_context *st, shader_type == PIPE_SHADER_TESS_EVAL || shader_type == PIPE_SHADER_COMPUTE); + /* update the ATI constants before rendering */ + struct ati_fragment_shader *ati_fs = st->fp->ati_fs; + if (shader_type == PIPE_SHADER_FRAGMENT && ati_fs) { + unsigned c; + + for (c = 0; c < MAX_NUM_FRAGMENT_CONSTANTS_ATI; c++) { + if (ati_fs->LocalConstDef & (1 << c)) + memcpy(params->ParameterValues[c], + ati_fs->Constants[c], sizeof(GLfloat) * 4); + else + memcpy(params->ParameterValues[c], + st->ctx->ATIFragmentShader.GlobalConstants[c], sizeof(GLfloat) * 4); + } + } + /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 709f0cbcb91..d0c2429dcef 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -38,18 +38,69 @@ #include "main/imports.h" #include "main/mtypes.h" #include "main/framebuffer.h" +#include "main/texobj.h" +#include "main/texstate.h" #include "program/program.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "util/u_simple_shaders.h" #include "cso_cache/cso_context.h" +#include "util/u_debug.h" #include "st_context.h" #include "st_atom.h" #include "st_program.h" +/** Compress the fog function enums into a 2-bit value */ +static GLuint +translate_fog_mode(GLenum mode) +{ + switch (mode) { + case GL_LINEAR: return 1; + case GL_EXP: return 2; + case GL_EXP2: return 3; + default: + return 0; + } +} + +static unsigned +get_texture_target(struct gl_context *ctx, const unsigned unit) +{ + struct gl_texture_object *texObj = _mesa_get_tex_unit(ctx, unit)->_Current; + gl_texture_index index; + + if (texObj) { + index = _mesa_tex_target_to_index(ctx, texObj->Target); + } else { + /* fallback for missing texture */ + index = TEXTURE_2D_INDEX; + } + + /* Map mesa texture target to TGSI texture target. + * Copied from st_mesa_to_tgsi.c, the shadow part is omitted */ + switch(index) { + case TEXTURE_2D_MULTISAMPLE_INDEX: return TGSI_TEXTURE_2D_MSAA; + case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY_MSAA; + case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER; + case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; + case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; + case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; + case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; + case TEXTURE_CUBE_ARRAY_INDEX: return TGSI_TEXTURE_CUBE_ARRAY; + case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; + case TEXTURE_1D_ARRAY_INDEX: return TGSI_TEXTURE_1D_ARRAY; + case TEXTURE_2D_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY; + case TEXTURE_EXTERNAL_INDEX: return TGSI_TEXTURE_2D; + default: + debug_assert(0); + return TGSI_TEXTURE_1D; + } +} + + /** * Update fragment program state/atom. This involves translating the * Mesa fragment program into a gallium fragment program and binding it. @@ -79,6 +130,18 @@ update_fp( struct st_context *st ) st->ctx->Multisample.MinSampleShadingValue * _mesa_geometric_samples(st->ctx->DrawBuffer) > 1; + if (stfp->ati_fs) { + unsigned u; + + if (st->ctx->Fog.Enabled) { + key.fog = translate_fog_mode(st->ctx->Fog.Mode); + } + + for (u = 0; u < MAX_NUM_FRAGMENT_REGISTERS_ATI; u++) { + key.texture_targets[u] = get_texture_target(st->ctx, u); + } + } + st->fp_variant = st_get_fp_variant(st, stfp, &key); st_reference_fragprog(st, &st->fp, stfp); @@ -91,7 +154,7 @@ update_fp( struct st_context *st ) const struct st_tracked_state st_update_fp = { "st_update_fp", /* name */ { /* dirty */ - _NEW_BUFFERS | _NEW_MULTISAMPLE, /* mesa */ + _NEW_BUFFERS | _NEW_MULTISAMPLE | _NEW_FOG, /* mesa */ ST_NEW_FRAGMENT_PROGRAM /* st */ }, update_fp /* update */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 09f4d8e00d1..01ed5441d11 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -1302,6 +1302,7 @@ blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, !ctx->FragmentProgram.Enabled && !ctx->VertexProgram.Enabled && !ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] && + !ctx->ATIFragmentShader._Enabled && ctx->DrawBuffer->_NumColorDrawBuffers == 1 && !ctx->Query.CondRenderQuery && !ctx->Query.CurrentOcclusionObject) { diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 27cc0f3d154..d79cfe239e4 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -46,6 +46,7 @@ #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" #include "st_glsl_to_tgsi.h" +#include "st_atifs_to_tgsi.h" @@ -302,6 +303,22 @@ st_program_string_notify( struct gl_context *ctx, if (st->cp == stcp) st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM; } + else if (target == GL_FRAGMENT_SHADER_ATI) { + assert(prog); + + struct st_fragment_program *stfp = (struct st_fragment_program *) prog; + assert(stfp->ati_fs); + assert(stfp->ati_fs->Program == prog); + + st_init_atifs_prog(ctx, prog); + + st_release_fp_variants(st, stfp); + if (!st_translate_fragment_program(st, stfp)) + return false; + + if (st->fp == stfp) + st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; + } if (ST_DEBUG & DEBUG_PRECOMPILE || st->shader_has_one_variant[stage]) @@ -310,6 +327,19 @@ st_program_string_notify( struct gl_context *ctx, return GL_TRUE; } +/** + * Called via ctx->Driver.NewATIfs() + * Called in glEndFragmentShaderATI() + */ +static struct gl_program * +st_new_ati_fs(struct gl_context *ctx, struct ati_fragment_shader *curProg) +{ + struct gl_program *prog = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, + curProg->Id); + struct st_fragment_program *stfp = (struct st_fragment_program *)prog; + stfp->ati_fs = curProg; + return prog; +} /** * Plug in the program and shader-related device driver functions. @@ -322,6 +352,7 @@ st_init_program_functions(struct dd_function_table *functions) functions->NewProgram = st_new_program; functions->DeleteProgram = st_delete_program; functions->ProgramStringNotify = st_program_string_notify; + functions->NewATIfs = st_new_ati_fs; functions->LinkShader = st_link_shader; } diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 80dcfd82743..94dc48971ec 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -53,6 +53,7 @@ #include "st_context.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "st_atifs_to_tgsi.h" #include "cso_cache/cso_context.h" @@ -811,7 +812,22 @@ st_translate_fragment_program(struct st_context *st, free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); stfp->glsl_to_tgsi = NULL; - } else + } else if (stfp->ati_fs) + st_translate_atifs_program(ureg, + stfp->ati_fs, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index); + else st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, ureg, @@ -849,6 +865,16 @@ st_create_fp_variant(struct st_context *st, assert(!(key->bitmap && key->drawpixels)); + /* Fix texture targets and add fog for ATI_fs */ + if (stfp->ati_fs) { + const struct tgsi_token *tokens = st_fixup_atifs(tgsi.tokens, key); + + if (tokens) + tgsi.tokens = tokens; + else + fprintf(stderr, "mesa: cannot post-process ATI_fs\n"); + } + /* Emulate features. */ if (key->clamp_color || key->persample_shading) { const struct tgsi_token *tokens; @@ -858,9 +884,11 @@ st_create_fp_variant(struct st_context *st, tokens = tgsi_emulate(tgsi.tokens, flags); - if (tokens) + if (tokens) { + if (tgsi.tokens != stfp->tgsi.tokens) + tgsi_free_tokens(tgsi.tokens); tgsi.tokens = tokens; - else + } else fprintf(stderr, "mesa: cannot emulate deprecated features\n"); } diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 028fba99a74..7c90fd74e14 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -35,6 +35,7 @@ #define ST_PROGRAM_H #include "main/mtypes.h" +#include "main/atifragshader.h" #include "program/program.h" #include "pipe/p_state.h" #include "st_context.h" @@ -65,6 +66,12 @@ struct st_fp_variant_key /** for ARB_sample_shading */ GLuint persample_shading:1; + + /** needed for ATI_fragment_shader */ + GLuint fog:2; + + /** needed for ATI_fragment_shader */ + char texture_targets[MAX_NUM_FRAGMENT_REGISTERS_ATI]; }; @@ -99,6 +106,7 @@ struct st_fragment_program struct gl_fragment_program Base; struct pipe_shader_state tgsi; struct glsl_to_tgsi_visitor* glsl_to_tgsi; + struct ati_fragment_shader *ati_fs; struct st_fp_variant *variants; }; -- cgit v1.2.3 From 920fbecf57bc8e81db029a52f6ef1c9344d8ddab Mon Sep 17 00:00:00 2001 From: Miklós Máté Date: Thu, 24 Mar 2016 01:12:58 +0100 Subject: st/mesa: enable GL_ATI_fragment_shader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miklós Máté Signed-off-by: Marek Olšák --- docs/relnotes/11.3.0.html | 1 + src/mesa/state_tracker/st_extensions.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html index acd8e11e3fc..508fbd34901 100644 --- a/docs/relnotes/11.3.0.html +++ b/docs/relnotes/11.3.0.html @@ -48,6 +48,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_shader_atomic_counter_ops on nvc0
  • GL_ARB_shader_image_load_store on radeonsi
  • GL_ARB_shader_image_size on radeonsi
  • +
  • GL_ATI_fragment_shader on all Gallium drivers
  • GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp
  • GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store
  • diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 0d6c6b196a1..44d93e30b4d 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -810,6 +810,7 @@ void st_init_extensions(struct pipe_screen *screen, extensions->EXT_texture_env_dot3 = GL_TRUE; extensions->EXT_vertex_array_bgra = GL_TRUE; + extensions->ATI_fragment_shader = GL_TRUE; extensions->ATI_texture_env_combine3 = GL_TRUE; extensions->MESA_pack_invert = GL_TRUE; -- cgit v1.2.3 From baab345b192d207236253ce67b320fb32fa67625 Mon Sep 17 00:00:00 2001 From: Miklós Máté Date: Thu, 24 Mar 2016 01:13:00 +0100 Subject: st/mesa: fix handling the fallback texture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes crash when post-processing is enabled in SW:KotOR. v2: fix const-ness v3: move assignment into the if() block Signed-off-by: Miklós Máté Signed-off-by: Marek Olšák --- src/mesa/state_tracker/st_atom_sampler.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 82dcf5ee0ca..a1cfa1c34c5 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -133,18 +133,19 @@ convert_sampler(struct st_context *st, { const struct gl_texture_object *texobj; struct gl_context *ctx = st->ctx; - struct gl_sampler_object *msamp; + const struct gl_sampler_object *msamp; GLenum texBaseFormat; texobj = ctx->Texture.Unit[texUnit]._Current; if (!texobj) { texobj = _mesa_get_fallback_texture(ctx, TEXTURE_2D_INDEX); + msamp = &texobj->Sampler; + } else { + msamp = _mesa_get_samplerobj(ctx, texUnit); } texBaseFormat = _mesa_texture_base_format(texobj); - msamp = _mesa_get_samplerobj(ctx, texUnit); - memset(sampler, 0, sizeof(*sampler)); sampler->wrap_s = gl_wrap_xlate(msamp->WrapS); sampler->wrap_t = gl_wrap_xlate(msamp->WrapT); -- cgit v1.2.3 From 50d653c2bbe7e6a7c0893b13ad6e57b68f55f8e4 Mon Sep 17 00:00:00 2001 From: Miklós Máté Date: Thu, 24 Mar 2016 01:13:02 +0100 Subject: mesa: optimize out the realloc from glCopyTexImagexD() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: comment about the purpose of the code v3: also compare texFormat, add a perf debug message, formatting fixes Reviewed-by: Ian Romanick Signed-off-by: Miklós Máté Signed-off-by: Marek Olšák --- src/mesa/main/teximage.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 616a92953e7..5af0a2ec198 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -3484,6 +3484,24 @@ formats_differ_in_component_sizes(mesa_format f1, mesa_format f2) return GL_FALSE; } +static bool +can_avoid_reallocation(struct gl_texture_image *texImage, GLenum internalFormat, + mesa_format texFormat, GLint x, GLint y, GLsizei width, + GLsizei height, GLint border) +{ + if (texImage->InternalFormat != internalFormat) + return false; + if (texImage->TexFormat != texFormat) + return false; + if (texImage->Border != border) + return false; + if (texImage->Width2 != width) + return false; + if (texImage->Height2 != height) + return false; + return true; +} + /** * Implement the glCopyTexImage1/2D() functions. */ @@ -3527,6 +3545,24 @@ copyteximage(struct gl_context *ctx, GLuint dims, texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, internalFormat, GL_NONE, GL_NONE); + /* First check if reallocating the texture buffer can be avoided. + * Without the realloc the copy can be 20x faster. + */ + _mesa_lock_texture(ctx, texObj); + { + texImage = _mesa_select_tex_image(texObj, target, level); + if (texImage && can_avoid_reallocation(texImage, internalFormat, texFormat, + x, y, width, height, border)) { + _mesa_unlock_texture(ctx, texObj); + return _mesa_copy_texture_sub_image(ctx, dims, texObj, target, level, + 0, 0, 0, x, y, width, height, + "CopyTexImage"); + } + } + _mesa_unlock_texture(ctx, texObj); + _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_LOW, "glCopyTexImage " + "can't avoid reallocating texture storage\n"); + rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat); if (_mesa_is_gles3(ctx)) { -- cgit v1.2.3 From 21c479256a4f195bce89bc313ab9367deef20bb1 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 21 Mar 2016 12:18:40 +0100 Subject: st/mesa: only minify height if target != 1D array in st_finalize_texture The st_texture_object documentation says: "the number of 1D array layers will be in height0" We can't minify that. Spotted by luck. No app is known to hit this issue. Reviewed-by: Ilia Mirkin --- src/mesa/state_tracker/st_cb_texture.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 460c1790663..3980f5d2f51 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -2886,12 +2886,17 @@ st_finalize_texture(struct gl_context *ctx, /* Need to import images in main memory or held in other textures. */ if (stImage && stObj->pt != stImage->pt) { + GLuint height = stObj->height0; GLuint depth = stObj->depth0; + + if (stObj->base.Target != GL_TEXTURE_1D_ARRAY) + height = u_minify(height, level); if (stObj->base.Target == GL_TEXTURE_3D) depth = u_minify(depth, level); + if (level == 0 || (stImage->base.Width == u_minify(stObj->width0, level) && - stImage->base.Height == u_minify(stObj->height0, level) && + stImage->base.Height == height && stImage->base.Depth == depth)) { /* src image fits expected dest mipmap level size */ copy_image_data_to_texture(st, stObj, level, stImage); -- cgit v1.2.3 From dd5f0950e4105b022d6c909e0a39fe38426312c4 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 28 Mar 2016 17:01:49 +0200 Subject: mesa/st: Fix NULL access if no fragment shader is bound MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Marek Olšák --- src/mesa/state_tracker/st_atom_constbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 4d9b344111b..a980dbedac5 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -65,8 +65,8 @@ void st_upload_constants( struct st_context *st, shader_type == PIPE_SHADER_COMPUTE); /* update the ATI constants before rendering */ - struct ati_fragment_shader *ati_fs = st->fp->ati_fs; - if (shader_type == PIPE_SHADER_FRAGMENT && ati_fs) { + if (shader_type == PIPE_SHADER_FRAGMENT && st->fp->ati_fs) { + struct ati_fragment_shader *ati_fs = st->fp->ati_fs; unsigned c; for (c = 0; c < MAX_NUM_FRAGMENT_CONSTANTS_ATI; c++) { -- cgit v1.2.3 From de505f7d7bfff3fe031242589c8986ded201c837 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 24 Mar 2016 16:21:35 -0700 Subject: i965: Whack UAV bit when FS discards and there are no color writes. dEQP-GLES31.functional.fbo.no_attachments.* draws a quad with no framebuffer attachments, using a shader that discards based on gl_FragCoord. It uses occlusion queries to inspect whether pixels are rendered or not. Unfortunately, the hardware is not dispatching any pixel shaders, so discards never happen, and the full quad of pixels increments PS_DEPTH_COUNT, making the occlusion query results bogus. To understand why, we have to delve into the WM_INT internal signalling mechanism's formulas. The "WM_INT::Pixel Shader Kill Pixel" signal is defined as: 3DSTATE_WM::ForceKillPixel == ON || (3DSTATE_WM::ForceKillPixel != Off && !WM_INT::WM_HZ_OP && 3DSTATE_WM::EDSC_Mode != PREPS && (WM_INT::Depth Write Enable || WM_INT::Stencil Write Enable) && ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 3DSTATE_PS_EXTRA:: oMask Present to RenderTarget || 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 3DSTATE_PS_BLEND::AlphaTestEnable || 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)) Because there is no depth or stencil buffer, writes to those buffers are disabled. So the highlighted condition is false, making the whole "Kill Pixel" condition false. This then feeds into the following "WM_INT::ThreadDispatchEnable" condition: 3DSTATE_WM::ForceThreadDispatch != OFF && !WM_INT::WM_HZ_OP && 3DSTATE_PS_EXTRA::PixelShaderValid && (3DSTATE_PS_EXTRA::PixelShaderHasUAV || WM_INT::Pixel Shader Kill Pixel || WM_INT::RTIndependentRasterizationEnable || (!3DSTATE_PS_EXTRA::PixelShaderDoesNotWriteRT && 3DSTATE_PS_BLEND::HasWriteableRT) || (WM_INT::Pixel Shader Computed Depth Mode != PSCDEPTH_OFF && (WM_INT::Depth Test Enable || WM_INT::Depth Write Enable)) || (3DSTATE_PS_EXTRA::Computed Stencil && WM_INT::Stencil Test Enable) || (3DSTATE_WM::EDSC_Mode == 1 && (WM_INT::Depth Test Enable || WM_INT::Depth Write Enable || WM_INT::Stencil Test Enable))) Given that there's no depth/stencil testing, no writeable render target, and the hardware thinks kill pixel doesn't happen, all of these conditions are false. We have to whack some bit to make PS invocations happen. There are many options. Curro suggested using the UAV bit. There's some precedence in doing that - we set it for fragment shaders that do SSBO/image/atomic writes when no color buffer writes are enabled. We can simply include discard here too. Fixes 64 dEQP-GLES31.functional.fbo.no_attachments.* tests. v2: Add a comment suggested and written by Jason Ekstrand. Signed-off-by: Kenneth Graunke Reviewed-by: Francisco Jerez Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/gen8_ps_state.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index b9a06e7b2c7..7dfd4bfb8de 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -91,10 +91,15 @@ gen8_upload_ps_extra(struct brw_context *brw, * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any * difference so we may just disable it here. * + * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't + * take into account KillPixels when no depth or stencil writes are enabled. + * In order for occlusion queries to work correctly with no attachments, we + * need to force-enable here. + * * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */ - if (_mesa_active_fragment_shader_has_side_effects(&brw->ctx) && - !brw_color_buffer_write_enabled(brw)) + if ((_mesa_active_fragment_shader_has_side_effects(ctx) || + prog_data->uses_kill) && !brw_color_buffer_write_enabled(brw)) dw1 |= GEN8_PSX_SHADER_HAS_UAV; if (prog_data->computed_stencil) { -- cgit v1.2.3 From 72473658c51d5e074ce219c1e6385a4cce29f467 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 25 Mar 2016 15:33:35 -0700 Subject: i965: Fix brw_render_cache_set_check_flush's PIPE_CONTROLs. Our driver uses the brw_render_cache mechanism to track buffers we've rendered to and are about to sample from. Previously, we did a single PIPE_CONTROL with the following bits set: - Render Target Flush - Depth Cache Flush - Texture Cache Invalidate - VF Cache Invalidate - Instruction Cache Invalidate - CS Stall This combined both "top of pipe" invalidations and "bottom of pipe" flushes, which isn't how the hardware is intended to be programmed. The "top of pipe" invalidations may happen right away, without any guarantees that rendering using those caches has completed. That rendering may continue altering the caches. The "bottom of pipe" flushes do wait for the rendering to complete. The CS stall also prevents further work from happening until data is flushed out. What we wanted to do was wait for rendering complete, flush the new data out of the render and depth caches, wait, then invalidate any stale data in read-only caches. We can accomplish this by doing the "bottom of pipe" flushes with a CS stall, then the "top of pipe" flushes as a second PIPE_CONTROL. The flushes will wait until the rendering is complete, and the CS stall will prevent the second PIPE_CONTROL with the invalidations from executing until the first is done. Fixes dEQP-GLES3.functional.texture.specification.teximage2d_pbo subtests on Braswell and Skylake. These tests hit the meta PBO texture upload path, which binds the PBO as a texture and samples from it, while rendering to the destination texture. The tests then sample from the texture. For now, we leave Gen4-5 alone. It probably needs work too, but apparently it hasn't even been setting the (G45+) TC invalidation bit at all... v2: Add Sandybridge post-sync non-zero workaround, for safety. Cc: mesa-stable@lists.freedesktop.org Suggested-by: Francisco Jerez Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_pipe_control.c | 2 -- src/mesa/drivers/dri/i965/intel_fbo.c | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index b41e28e1ec8..4672efdffc3 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -338,8 +338,6 @@ brw_emit_mi_flush(struct brw_context *brw) } brw_emit_pipe_control_flush(brw, flags); } - - brw_render_cache_set_clear(brw); } int diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index b7b679686e5..7eb21acc40b 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -1065,7 +1065,28 @@ brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo) if (!_mesa_set_search(brw->render_cache, bo)) return; - brw_emit_mi_flush(brw); + if (brw->gen >= 6) { + if (brw->gen == 6) { + /* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache + * Flush Enable = 1, a PIPE_CONTROL with any non-zero + * post-sync-op is required. + */ + brw_emit_post_sync_nonzero_flush(brw); + } + + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_CS_STALL); + + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE); + } else { + brw_emit_mi_flush(brw); + } + + brw_render_cache_set_clear(brw); } /** -- cgit v1.2.3 From 0faf26e6a0a34c3544644852802484f2404cc83e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 23 Mar 2016 11:56:39 -0700 Subject: i965: Always use BRW_TEXCOORDMODE_CUBE when seamless filtering. When using seamless cube map mode and NEAREST filtering, we explicitly overrode the wrap modes to CLAMP_TO_EDGE. This was to implement the following spec text: "If NEAREST filtering is done within a miplevel, always apply apply wrap mode CLAMP_TO_EDGE." However, textureGather() ignores the sampler's filtering mode, and instead returns the four pixels that would be blended by LINEAR filtering. This implies that we should do proper seamless filtering, and include pixels from adjacent cube faces. It turns out that we can simply delete the NEAREST -> CLAMP_TO_EDGE overrides. Normal cube map sampling works by first selecting the face, and then nearest filtering fetches the closest texel. If the nearest texel was on a different face, then that face would have been chosen. So it should always be within the face anyway, which effectively performs CLAMP_TO_EDGE. Fixes 86 dEQP-GLES31.texture.gather.basic.cube.* tests. Signed-off-by: Kenneth Graunke Suggested-by: Ian Romanick Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_sampler_state.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index c20a02817f9..3bd22c7559f 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -460,9 +460,7 @@ brw_update_sampler_state(struct brw_context *brw, /* Cube maps must use the same wrap mode for all three coordinate * dimensions. Prior to Haswell, only CUBE and CLAMP are valid. */ - if ((tex_cube_map_seamless || sampler->CubeMapSeamless) && - (sampler->MinFilter != GL_NEAREST || - sampler->MagFilter != GL_NEAREST)) { + if (tex_cube_map_seamless || sampler->CubeMapSeamless) { wrap_s = BRW_TEXCOORDMODE_CUBE; wrap_t = BRW_TEXCOORDMODE_CUBE; wrap_r = BRW_TEXCOORDMODE_CUBE; -- cgit v1.2.3 From 60d6a8989ab44cf47accee6bc692ba6fb98f6a9f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 7 Mar 2016 23:54:53 -0800 Subject: i965: Set address rounding bits for GL_NEAREST filtering as well. Yuanhan Liu decided these were useful for linear filtering in commit 76669381 (circa 2011). Prior to that, we never set them; it seems he tried to preserve that behavior for nearest filtering. It turns out they're useful for nearest filtering, too: setting these fixes the following dEQP-GLES3 tests: functional.fbo.blit.rect.nearest_consistency_mag functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_x functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_y functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_x functional.fbo.blit.rect.nearest_consistency_mag_reverse_dst_y functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_x functional.fbo.blit.rect.nearest_consistency_mag_reverse_src_dst_y functional.fbo.blit.rect.nearest_consistency_min functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x functional.fbo.blit.rect.nearest_consistency_min_reverse_src_y functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_x functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_y functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y functional.fbo.blit.rect.nearest_consistency_out_of_bounds_mag functional.fbo.blit.rect.nearest_consistency_out_of_bounds_mag_reverse_src_x functional.fbo.blit.rect.nearest_consistency_out_of_bounds_mag_reverse_src_y functional.fbo.blit.rect.nearest_consistency_out_of_bounds_mag_reverse_dst_x functional.fbo.blit.rect.nearest_consistency_out_of_bounds_mag_reverse_dst_y functional.fbo.blit.rect.nearest_consistency_out_of_bounds_mag_reverse_src_dst_x functional.fbo.blit.rect.nearest_consistency_out_of_bounds_mag_reverse_src_dst_y functional.fbo.blit.rect.nearest_consistency_out_of_bounds_min functional.fbo.blit.rect.nearest_consistency_out_of_bounds_min_reverse_src_x functional.fbo.blit.rect.nearest_consistency_out_of_bounds_min_reverse_src_y functional.fbo.blit.rect.nearest_consistency_out_of_bounds_min_reverse_dst_x functional.fbo.blit.rect.nearest_consistency_out_of_bounds_min_reverse_dst_y functional.fbo.blit.rect.nearest_consistency_out_of_bounds_min_reverse_src_dst_x functional.fbo.blit.rect.nearest_consistency_out_of_bounds_min_reverse_src_dst_y Apparently, BLORP has always set these bits unconditionally. However, setting them unconditionally appears to regress tests using texture projection, 3D samplers, integer formats, and vertex shaders, all in combination, such as: functional.shaders.texture_functions.textureprojlod.isampler3d_vertex Setting them on Gen4-5 appears to regress Piglit's tests/spec/arb_sampler_objects/framebufferblit. Honestly, it looks like the real problem here is a lack of precision. I'm just hacking around problems here (as embarassing as it is). Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_sampler_state.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 3bd22c7559f..7bd21f7aaf0 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -436,14 +436,17 @@ brw_update_sampler_state(struct brw_context *brw, } } - /* Set address rounding bits if not using nearest filtering. */ + /* Set address rounding bits. The conditions are empirically + * derived in order to pass test cases. + */ + bool round_nearest = brw->gen >= 6 && target != GL_TEXTURE_3D; unsigned address_rounding = 0; - if (min_filter != BRW_MAPFILTER_NEAREST) { + if (min_filter != BRW_MAPFILTER_NEAREST || round_nearest) { address_rounding |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | BRW_ADDRESS_ROUNDING_ENABLE_R_MIN; } - if (mag_filter != BRW_MAPFILTER_NEAREST) { + if (mag_filter != BRW_MAPFILTER_NEAREST || round_nearest) { address_rounding |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; -- cgit v1.2.3 From 86d87d10474d1c5c5683acb28d4491e877432a90 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Sun, 27 Mar 2016 14:51:02 +1100 Subject: mesa: remove initialized field from uniform storage The only place this was used was in a gallium debug function that had to be manually enabled. Reviewed-by: Edward O'Callaghan Reviewed-by: Brian Paul --- src/compiler/glsl/ir_uniform.h | 5 ---- src/compiler/glsl/link_uniform_initializers.cpp | 4 --- src/compiler/glsl/link_uniforms.cpp | 1 - src/mesa/main/shaderapi.c | 3 +- src/mesa/main/uniform_query.cpp | 4 --- src/mesa/state_tracker/st_draw.c | 37 ------------------------- 6 files changed, 1 insertion(+), 53 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/ir_uniform.h b/src/compiler/glsl/ir_uniform.h index 1854279925b..e72e7b42c57 100644 --- a/src/compiler/glsl/ir_uniform.h +++ b/src/compiler/glsl/ir_uniform.h @@ -105,11 +105,6 @@ struct gl_uniform_storage { */ unsigned array_elements; - /** - * Has this uniform ever been set? - */ - bool initialized; - struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES]; /** diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp index 7d280ccf7fc..870bc5bfebd 100644 --- a/src/compiler/glsl/link_uniform_initializers.cpp +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -162,8 +162,6 @@ set_opaque_binding(void *mem_ctx, gl_shader_program *prog, } } } - - storage->initialized = true; } } @@ -267,8 +265,6 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, } } } - - storage->initialized = true; } } diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp index 807b069e3ed..cd487ab6dd0 100644 --- a/src/compiler/glsl/link_uniforms.cpp +++ b/src/compiler/glsl/link_uniforms.cpp @@ -799,7 +799,6 @@ private: this->uniforms[id].name = ralloc_strdup(this->uniforms, name); this->uniforms[id].type = base_type; - this->uniforms[id].initialized = 0; this->uniforms[id].num_driver_storage = 0; this->uniforms[id].driver_storage = NULL; this->uniforms[id].atomic_buffer_index = -1; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 32fad56f651..ba2607221d9 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2568,7 +2568,6 @@ _mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count, memcpy(&uni->storage[0], &indices[i], sizeof(GLuint) * uni_count); - uni->initialized = true; _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count); i += uni_count; } while(i < count); @@ -2742,7 +2741,7 @@ _mesa_shader_init_subroutine_defaults(struct gl_shader *sh) for (j = 0; j < uni_count; j++) memcpy(&uni->storage[j], &val, sizeof(int)); - uni->initialized = true; + _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count); } } diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 2ced201ebca..ab5c3cd9249 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -815,8 +815,6 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg, } } - uni->initialized = true; - _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); /* If the uniform is a sampler, do the extra magic necessary to propagate @@ -1030,8 +1028,6 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg, } } - uni->initialized = true; - _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index fdd59a383a9..3db5749725e 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -126,35 +126,6 @@ setup_index_buffer(struct st_context *st, } -/** - * Prior to drawing, check that any uniforms referenced by the - * current shader have been set. If a uniform has not been set, - * issue a warning. - */ -static void -check_uniforms(struct gl_context *ctx) -{ - struct gl_shader_program **shProg = ctx->_Shader->CurrentProgram; - unsigned j; - - for (j = 0; j < 3; j++) { - unsigned i; - - if (shProg[j] == NULL || !shProg[j]->LinkStatus) - continue; - - for (i = 0; i < shProg[j]->NumUniformStorage; i++) { - const struct gl_uniform_storage *u = &shProg[j]->UniformStorage[i]; - if (!u->initialized) { - _mesa_warning(ctx, - "Using shader with uninitialized uniform: %s", - u->name); - } - } - } -} - - /** * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to * the corresponding Gallium type. @@ -203,14 +174,6 @@ st_draw_vbo(struct gl_context *ctx, /* Validate state. */ if (st->dirty.st || st->dirty.mesa || ctx->NewDriverState) { st_validate_state(st, ST_PIPELINE_RENDER); - -#if 0 - if (MESA_VERBOSE & VERBOSE_GLSL) { - check_uniforms(ctx); - } -#else - (void) check_uniforms; -#endif } if (st->vertex_array_out_of_memory) { -- cgit v1.2.3 From 659beca666c4e90ab5f366b231a94ed437898b80 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 27 Feb 2016 16:04:51 -0500 Subject: mesa: properly return GetTexLevelParameter queries for buffer textures This fixes all failures with dEQP tests in this area. While ARB_texture_buffer_object explicitly says that GetTexLevelParameter & co should not be supported, GL 3.1 reverses this decision and allows all of these queries there. Conversely, there is no text that forbids the buffer-specific queries from being used with non-buffer images. Signed-off-by: Ilia Mirkin Reviewed-by: Brian Paul --- src/mesa/main/texparam.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 9350ca5c035..8a3e02f0552 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1447,6 +1447,29 @@ get_tex_level_parameter_image(struct gl_context *ctx, *params = img->FixedSampleLocations; break; + /* There is never a buffer data store here, but these pnames still have + * to work. + */ + + /* GL_ARB_texture_buffer_object */ + case GL_TEXTURE_BUFFER_DATA_STORE_BINDING: + if (!ctx->Extensions.ARB_texture_buffer_object) + goto invalid_pname; + *params = 0; + break; + + /* GL_ARB_texture_buffer_range */ + case GL_TEXTURE_BUFFER_OFFSET: + if (!ctx->Extensions.ARB_texture_buffer_range) + goto invalid_pname; + *params = 0; + break; + case GL_TEXTURE_BUFFER_SIZE: + if (!ctx->Extensions.ARB_texture_buffer_range) + goto invalid_pname; + *params = 0; + break; + default: goto invalid_pname; } @@ -1468,13 +1491,24 @@ get_tex_level_parameter_buffer(struct gl_context *ctx, { const struct gl_buffer_object *bo = texObj->BufferObject; mesa_format texFormat = texObj->_BufferObjectFormat; + int bytes = MAX2(1, _mesa_get_format_bytes(texFormat)); GLenum internalFormat = texObj->BufferObjectFormat; GLenum baseFormat = _mesa_get_format_base_format(texFormat); const char *suffix = dsa ? "ture" : ""; if (!bo) { /* undefined texture buffer object */ - *params = pname == GL_TEXTURE_COMPONENTS ? 1 : 0; + switch (pname) { + case GL_TEXTURE_FIXED_SAMPLE_LOCATIONS: + *params = GL_TRUE; + break; + case GL_TEXTURE_INTERNAL_FORMAT: + *params = internalFormat; + break; + default: + *params = 0; + break; + } return; } @@ -1483,10 +1517,13 @@ get_tex_level_parameter_buffer(struct gl_context *ctx, *params = bo->Name; break; case GL_TEXTURE_WIDTH: - *params = bo->Size; + *params = ((texObj->BufferSize == -1) ? bo->Size : texObj->BufferSize) + / bytes; break; case GL_TEXTURE_HEIGHT: case GL_TEXTURE_DEPTH: + *params = 1; + break; case GL_TEXTURE_BORDER: case GL_TEXTURE_SHARED_SIZE: case GL_TEXTURE_COMPRESSED: @@ -1536,6 +1573,19 @@ get_tex_level_parameter_buffer(struct gl_context *ctx, *params = (texObj->BufferSize == -1) ? bo->Size : texObj->BufferSize; break; + /* GL_ARB_texture_multisample */ + case GL_TEXTURE_SAMPLES: + if (!ctx->Extensions.ARB_texture_multisample) + goto invalid_pname; + *params = 0; + break; + + case GL_TEXTURE_FIXED_SAMPLE_LOCATIONS: + if (!ctx->Extensions.ARB_texture_multisample) + goto invalid_pname; + *params = GL_TRUE; + break; + /* GL_ARB_texture_compression */ case GL_TEXTURE_COMPRESSED_IMAGE_SIZE: /* Always illegal for GL_TEXTURE_BUFFER */ -- cgit v1.2.3 From 74b76c08a3732b0ca337998780d01d67e7fd554b Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 27 Feb 2016 16:06:42 -0500 Subject: mesa: add OES_texture_buffer and EXT_texture_buffer extension to table We need to add a new bit since the GL ES exts require functionality from a combination of texture buffer extensions as well as images (for imageBuffer) support. Additionally, not all GPUs support all the texture buffer functionality (e.g. rgb32 isn't supported by nv50). Signed-off-by: Ilia Mirkin Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/extensions_table.h | 2 ++ src/mesa/main/mtypes.h | 1 + 2 files changed, 3 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 54a5bb057a3..7885aefcc28 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -228,6 +228,7 @@ EXT(EXT_texture , dummy_true EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006) EXT(EXT_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014) +EXT(EXT_texture_buffer , OES_texture_buffer , x , x , x , 31, 2014) EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004) EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006) EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004) @@ -339,6 +340,7 @@ EXT(OES_stencil_wrap , dummy_true EXT(OES_surfaceless_context , dummy_true , x , x , ES1, ES2, 2012) EXT(OES_texture_3D , dummy_true , x , x , x , ES2, 2005) EXT(OES_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014) +EXT(OES_texture_buffer , OES_texture_buffer , x , x , x , 31, 2014) EXT(OES_texture_cube_map , ARB_texture_cube_map , x , x , ES1, x , 2007) EXT(OES_texture_env_crossbar , ARB_texture_env_crossbar , x , x , ES1, x , 2005) EXT(OES_texture_float , OES_texture_float , x , x , x , ES2, 2005) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index c2c86a6e0d1..ae0c8a84dda 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3895,6 +3895,7 @@ struct gl_extensions GLboolean EXT_timer_query; GLboolean EXT_vertex_array_bgra; GLboolean OES_standard_derivatives; + GLboolean OES_texture_buffer; /* vendor extensions */ GLboolean AMD_performance_monitor; GLboolean AMD_pinned_memory; -- cgit v1.2.3 From b4c0c514b10ed85b50e4fc3bbd9c740db21e5720 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 27 Feb 2016 16:16:28 -0500 Subject: mesa: add OES_texture_buffer and EXT_texture_buffer support Allow ES 3.1 contexts to access the texture buffer functionality. Signed-off-by: Ilia Mirkin Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- docs/GL3.txt | 2 +- src/mapi/glapi/gen/apiexec.py | 4 ++-- src/mapi/glapi/gen/es_EXT.xml | 36 ++++++++++++++++++++++++++++++ src/mesa/main/bufferobj.c | 4 ++-- src/mesa/main/get.c | 4 ++-- src/mesa/main/get_hash_params.py | 20 ++++++++--------- src/mesa/main/tests/dispatch_sanity.cpp | 4 ++++ src/mesa/main/teximage.c | 20 +++++++++++------ src/mesa/main/texobj.c | 8 +++---- src/mesa/main/texparam.c | 39 +++++++++++++++++---------------- 10 files changed, 94 insertions(+), 47 deletions(-) (limited to 'src/mesa') diff --git a/docs/GL3.txt b/docs/GL3.txt index 88c14c4c67d..03ebf70fb0f 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -269,7 +269,7 @@ GLES3.2, GLSL ES 3.2 GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done) GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers) GL_OES_texture_border_clamp DONE (all drivers) - GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done) + GL_OES_texture_buffer DONE (core only) GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers) GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8) GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample) diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py index 2a8043264eb..b4f4cf6831b 100644 --- a/src/mapi/glapi/gen/apiexec.py +++ b/src/mapi/glapi/gen/apiexec.py @@ -68,7 +68,7 @@ class exec_info(): functions = { # OpenGL 3.1 / GL_ARB_texture_buffer_object. Mesa only exposes this # extension with core profile. - "TexBuffer": exec_info(core=31), + "TexBuffer": exec_info(core=31, es2=31), # OpenGL 3.2 / GL_OES_geometry_shader. "FramebufferTexture": exec_info(core=32, es2=31), @@ -146,7 +146,7 @@ functions = { # OpenGL 4.3 / GL_ARB_texture_buffer_range. Mesa can expose the extension # with OpenGL 3.1. - "TexBufferRange": exec_info(core=31), + "TexBufferRange": exec_info(core=31, es2=31), # OpenGL 4.3 / GL_ARB_framebuffer_no_attachments. Mesa can expose the # extension with OpenGL 3.0. diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 178f7c027bc..8f8f997b20d 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -847,6 +847,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + API == API_OPENGL_CORE && - ctx->Extensions.ARB_texture_buffer_object) { + if (_mesa_has_ARB_texture_buffer_object(ctx) || + _mesa_has_OES_texture_buffer(ctx)) { return &ctx->Texture.BufferObject; } break; diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index b0fadc93aef..88efd3ee642 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1907,8 +1907,8 @@ tex_binding_to_index(const struct gl_context *ctx, GLenum binding) || _mesa_is_gles3(ctx) ? TEXTURE_2D_ARRAY_INDEX : -1; case GL_TEXTURE_BINDING_BUFFER: - return ctx->API == API_OPENGL_CORE && - ctx->Extensions.ARB_texture_buffer_object ? + return (_mesa_has_ARB_texture_buffer_object(ctx) || + _mesa_has_OES_texture_buffer(ctx)) ? TEXTURE_BUFFER_INDEX : -1; case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY: return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 12c21899cb1..62968fc0300 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -535,6 +535,16 @@ descriptor=[ # GL_ARB_gpu_shader5 / GL_OES_geometry_shader [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5_or_oes_geometry_shader" ], + +# GL_ARB_texture_buffer_object / GL_OES_texture_buffer + [ "MAX_TEXTURE_BUFFER_SIZE_ARB", "CONTEXT_INT(Const.MaxTextureBufferSize), extra_texture_buffer_object" ], + [ "TEXTURE_BINDING_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], + [ "TEXTURE_BUFFER_DATA_STORE_BINDING_ARB", "LOC_CUSTOM, TYPE_INT, TEXTURE_BUFFER_INDEX, extra_texture_buffer_object" ], + [ "TEXTURE_BUFFER_FORMAT_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], + [ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], + +# GL_ARB_texture_buffer_range + [ "TEXTURE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.TextureBufferOffsetAlignment), extra_ARB_texture_buffer_range" ], ]}, # Remaining enums are only in OpenGL @@ -805,13 +815,6 @@ descriptor=[ # GL_ARB_color_buffer_float [ "RGBA_FLOAT_MODE_ARB", "BUFFER_FIELD(Visual.floatMode, TYPE_BOOLEAN), extra_core_ARB_color_buffer_float_and_new_buffers" ], -# GL_ARB_texture_buffer_object - [ "MAX_TEXTURE_BUFFER_SIZE_ARB", "CONTEXT_INT(Const.MaxTextureBufferSize), extra_texture_buffer_object" ], - [ "TEXTURE_BINDING_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], - [ "TEXTURE_BUFFER_DATA_STORE_BINDING_ARB", "LOC_CUSTOM, TYPE_INT, TEXTURE_BUFFER_INDEX, extra_texture_buffer_object" ], - [ "TEXTURE_BUFFER_FORMAT_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], - [ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], - # GL 3.0 [ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ], @@ -871,9 +874,6 @@ descriptor=[ # Enums restricted to OpenGL Core profile { "apis": ["GL_CORE"], "params": [ -# GL_ARB_texture_buffer_range - [ "TEXTURE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.TextureBufferOffsetAlignment), extra_ARB_texture_buffer_range" ], - # GL_ARB_viewport_array [ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ], [ "VIEWPORT_SUBPIXEL_BITS", "CONTEXT_INT(Const.ViewportSubpixelBits), extra_ARB_viewport_array" ], diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 09b97c33074..309e574ac32 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2450,6 +2450,10 @@ const struct function gles3_functions_possible[] = { { "glGetSamplerParameterIivOES", 30, -1 }, { "glGetSamplerParameterIuivOES", 30, -1 }, + /* GL_OES_texture_buffer */ + { "glTexBufferOES", 31, -1 }, + { "glTexBufferRangeOES", 31, -1 }, + { NULL, 0, -1 } }; diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 5af0a2ec198..6ac6fb109d3 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -499,8 +499,8 @@ _mesa_max_texture_levels(struct gl_context *ctx, GLenum target) return ctx->Extensions.ARB_texture_cube_map_array ? ctx->Const.MaxCubeTextureLevels : 0; case GL_TEXTURE_BUFFER: - return ctx->API == API_OPENGL_CORE && - ctx->Extensions.ARB_texture_buffer_object ? 1 : 0; + return (_mesa_has_ARB_texture_buffer_object(ctx) || + _mesa_has_OES_texture_buffer(ctx)) ? 1 : 0; case GL_TEXTURE_2D_MULTISAMPLE: case GL_PROXY_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: @@ -4717,7 +4717,7 @@ _mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset, static mesa_format get_texbuffer_format(const struct gl_context *ctx, GLenum internalFormat) { - if (ctx->API != API_OPENGL_CORE) { + if (ctx->API == API_OPENGL_COMPAT) { switch (internalFormat) { case GL_ALPHA8: return MESA_FORMAT_A_UNORM8; @@ -4804,8 +4804,8 @@ get_texbuffer_format(const struct gl_context *ctx, GLenum internalFormat) } } - if (ctx->API == API_OPENGL_CORE && - ctx->Extensions.ARB_texture_buffer_object_rgb32) { + if (_mesa_has_ARB_texture_buffer_object_rgb32(ctx) || + _mesa_has_OES_texture_buffer(ctx)) { switch (internalFormat) { case GL_RGB32F: return MESA_FORMAT_RGB_FLOAT32; @@ -4822,6 +4822,8 @@ get_texbuffer_format(const struct gl_context *ctx, GLenum internalFormat) case GL_RGBA8: return MESA_FORMAT_R8G8B8A8_UNORM; case GL_RGBA16: + if (_mesa_is_gles(ctx)) + return MESA_FORMAT_NONE; return MESA_FORMAT_RGBA_UNORM16; case GL_RGBA16F_ARB: return MESA_FORMAT_RGBA_FLOAT16; @@ -4843,6 +4845,8 @@ get_texbuffer_format(const struct gl_context *ctx, GLenum internalFormat) case GL_RG8: return MESA_FORMAT_R8G8_UNORM; case GL_RG16: + if (_mesa_is_gles(ctx)) + return MESA_FORMAT_NONE; return MESA_FORMAT_R16G16_UNORM; case GL_RG16F: return MESA_FORMAT_RG_FLOAT16; @@ -4864,6 +4868,8 @@ get_texbuffer_format(const struct gl_context *ctx, GLenum internalFormat) case GL_R8: return MESA_FORMAT_R_UNORM8; case GL_R16: + if (_mesa_is_gles(ctx)) + return MESA_FORMAT_NONE; return MESA_FORMAT_R_UNORM16; case GL_R16F: return MESA_FORMAT_R_FLOAT16; @@ -4941,8 +4947,8 @@ _mesa_texture_buffer_range(struct gl_context *ctx, /* NOTE: ARB_texture_buffer_object has interactions with * the compatibility profile that are not implemented. */ - if (!(ctx->API == API_OPENGL_CORE && - ctx->Extensions.ARB_texture_buffer_object)) { + if (!_mesa_has_ARB_texture_buffer_object(ctx) && + !_mesa_has_OES_texture_buffer(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(ARB_texture_buffer_object is not" " implemented for the compatibility profile)", caller); diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index d8407f04340..c9502bda236 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -204,8 +204,8 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target) case GL_PROXY_TEXTURE_2D_ARRAY_EXT: return arrayTex ? ctx->Texture.ProxyTex[TEXTURE_2D_ARRAY_INDEX] : NULL; case GL_TEXTURE_BUFFER: - return ctx->API == API_OPENGL_CORE && - ctx->Extensions.ARB_texture_buffer_object ? + return (_mesa_has_ARB_texture_buffer_object(ctx) || + _mesa_has_OES_texture_buffer(ctx)) ? texUnit->CurrentTex[TEXTURE_BUFFER_INDEX] : NULL; case GL_TEXTURE_EXTERNAL_OES: return _mesa_is_gles(ctx) && ctx->Extensions.OES_EGL_image_external @@ -1574,8 +1574,8 @@ _mesa_tex_target_to_index(const struct gl_context *ctx, GLenum target) || _mesa_is_gles3(ctx) ? TEXTURE_2D_ARRAY_INDEX : -1; case GL_TEXTURE_BUFFER: - return ctx->API == API_OPENGL_CORE && - ctx->Extensions.ARB_texture_buffer_object ? + return (_mesa_has_ARB_texture_buffer_object(ctx) || + _mesa_has_OES_texture_buffer(ctx)) ? TEXTURE_BUFFER_INDEX : -1; case GL_TEXTURE_EXTERNAL_OES: return _mesa_is_gles(ctx) && ctx->Extensions.OES_EGL_image_external diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 8a3e02f0552..ba83f8fda9a 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1223,6 +1223,26 @@ _mesa_legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return ctx->Extensions.ARB_texture_multisample; + case GL_TEXTURE_BUFFER: + /* GetTexLevelParameter accepts GL_TEXTURE_BUFFER in GL 3.1+ contexts, + * but not in earlier versions that expose ARB_texture_buffer_object. + * + * From the ARB_texture_buffer_object spec: + * "(7) Do buffer textures support texture parameters (TexParameter) or + * queries (GetTexParameter, GetTexLevelParameter, GetTexImage)? + * + * RESOLVED: No. [...] Note that the spec edits above don't add + * explicit error language for any of these cases. That is because + * each of the functions enumerate the set of valid + * parameters. Not editing the spec to allow TEXTURE_BUFFER_ARB in + * these cases means that target is not legal, and an INVALID_ENUM + * error should be generated." + * + * From the OpenGL 3.1 spec: + * "target may also be TEXTURE_BUFFER, indicating the texture buffer." + */ + return (ctx->API == API_OPENGL_CORE && ctx->Version >= 31) || + _mesa_has_OES_texture_buffer(ctx); } if (!_mesa_is_desktop_gl(ctx)) @@ -1247,25 +1267,6 @@ _mesa_legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target case GL_PROXY_TEXTURE_1D_ARRAY_EXT: case GL_PROXY_TEXTURE_2D_ARRAY_EXT: return ctx->Extensions.EXT_texture_array; - case GL_TEXTURE_BUFFER: - /* GetTexLevelParameter accepts GL_TEXTURE_BUFFER in GL 3.1+ contexts, - * but not in earlier versions that expose ARB_texture_buffer_object. - * - * From the ARB_texture_buffer_object spec: - * "(7) Do buffer textures support texture parameters (TexParameter) or - * queries (GetTexParameter, GetTexLevelParameter, GetTexImage)? - * - * RESOLVED: No. [...] Note that the spec edits above don't add - * explicit error language for any of these cases. That is because - * each of the functions enumerate the set of valid - * parameters. Not editing the spec to allow TEXTURE_BUFFER_ARB in - * these cases means that target is not legal, and an INVALID_ENUM - * error should be generated." - * - * From the OpenGL 3.1 spec: - * "target may also be TEXTURE_BUFFER, indicating the texture buffer." - */ - return ctx->API == API_OPENGL_CORE && ctx->Version >= 31; case GL_PROXY_TEXTURE_2D_MULTISAMPLE: case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY: return ctx->Extensions.ARB_texture_multisample; -- cgit v1.2.3 From 9286cbdd1e2b0ef32db61f0fb4915ea2948ad4cd Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 28 Mar 2016 20:59:13 -0400 Subject: st/mesa: enable OES_texture_buffer when all components available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OES_texture_buffer combines bits from a number of desktop extensions. When they're all available, turn it on. Signed-off-by: Ilia Mirkin Reviewed-by: Marek Olšák --- src/mesa/state_tracker/st_extensions.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 44d93e30b4d..36a12010c23 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -1016,6 +1016,12 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_BIND_SAMPLER_VIEW); } + extensions->OES_texture_buffer = + extensions->ARB_texture_buffer_object && + extensions->ARB_texture_buffer_range && + extensions->ARB_texture_buffer_object_rgb32 && + extensions->ARB_shader_image_load_store; + /* Unpacking a varying in the fragment shader costs 1 texture indirection. * If the number of available texture indirections is very limited, then we * prefer to disable varying packing rather than run the risk of varying -- cgit v1.2.3 From cc68dc2b5e3c603580f70d682b0772d179b28ce3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 14 Jan 2016 16:46:57 +0100 Subject: st/mesa: implement new DMA-buf based VDPAU interop v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid using internal structures from another API. v2: rebase and moved includes so they don't cause problem when VDPAU isn't installed. Signed-off-by: Christian König Reviewed-by: Marek Olšák (v1) Reviewed-by: Leo Liu --- src/mesa/state_tracker/st_vdpau.c | 181 +++++++++++++++++++++++++++----------- 1 file changed, 132 insertions(+), 49 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_vdpau.c b/src/mesa/state_tracker/st_vdpau.c index 71dd15bc4fe..b9abebfc7bf 100644 --- a/src/mesa/state_tracker/st_vdpau.c +++ b/src/mesa/state_tracker/st_vdpau.c @@ -39,8 +39,6 @@ #include "pipe/p_state.h" #include "pipe/p_video_codec.h" -#include "state_tracker/vdpau_interop.h" - #include "util/u_inlines.h" #include "st_vdpau.h" @@ -51,70 +49,155 @@ #ifdef HAVE_ST_VDPAU +#include "state_tracker/vdpau_interop.h" +#include "state_tracker/vdpau_dmabuf.h" +#include "state_tracker/vdpau_funcs.h" +#include "state_tracker/drm_driver.h" + +static struct pipe_resource * +st_vdpau_video_surface_gallium(struct gl_context *ctx, const GLvoid *vdpSurface, + GLuint index) +{ + int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); + uint32_t device = (uintptr_t)ctx->vdpDevice; + struct pipe_sampler_view *sv; + VdpVideoSurfaceGallium *f; + + struct pipe_video_buffer *buffer; + struct pipe_sampler_view **samplers; + + getProcAddr = (void *)ctx->vdpGetProcAddress; + if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_GALLIUM, (void**)&f)) + return NULL; + + buffer = f((uintptr_t)vdpSurface); + if (!buffer) + return NULL; + + samplers = buffer->get_sampler_view_planes(buffer); + if (!samplers) + return NULL; + + sv = samplers[index >> 1]; + if (!sv) + return NULL; + + return sv->texture; +} + +static struct pipe_resource * +st_vdpau_output_surface_gallium(struct gl_context *ctx, const GLvoid *vdpSurface) +{ + int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); + uint32_t device = (uintptr_t)ctx->vdpDevice; + VdpOutputSurfaceGallium *f; + + getProcAddr = (void *)ctx->vdpGetProcAddress; + if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_GALLIUM, (void**)&f)) + return NULL; + + return f((uintptr_t)vdpSurface); +} + +static struct pipe_resource * +st_vdpau_resource_from_description(struct gl_context *ctx, + const struct VdpSurfaceDMABufDesc *desc) +{ + struct st_context *st = st_context(ctx); + struct pipe_resource templ, *res; + struct winsys_handle whandle; + + if (desc->handle == -1) + return NULL; + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.last_level = 0; + templ.depth0 = 1; + templ.array_size = 1; + templ.width0 = desc->width; + templ.height0 = desc->height; + templ.format = VdpFormatRGBAToPipe(desc->format); + templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + templ.usage = PIPE_USAGE_DEFAULT; + + memset(&whandle, 0, sizeof(whandle)); + whandle.type = DRM_API_HANDLE_TYPE_FD; + whandle.handle = desc->handle; + whandle.offset = desc->offset; + whandle.stride = desc->stride; + + res = st->pipe->screen->resource_from_handle(st->pipe->screen, &templ, &whandle, + PIPE_HANDLE_USAGE_READ_WRITE); + close(desc->handle); + + return res; +} + +static struct pipe_resource * +st_vdpau_output_surface_dma_buf(struct gl_context *ctx, const GLvoid *vdpSurface) +{ + int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); + uint32_t device = (uintptr_t)ctx->vdpDevice; + + struct VdpSurfaceDMABufDesc desc; + VdpOutputSurfaceDMABuf *f; + + getProcAddr = (void *)ctx->vdpGetProcAddress; + if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_DMA_BUF, (void**)&f)) + return NULL; + + if (f((uintptr_t)vdpSurface, &desc) != VDP_STATUS_OK) + return NULL; + + return st_vdpau_resource_from_description(ctx, &desc); +} + +static struct pipe_resource * +st_vdpau_video_surface_dma_buf(struct gl_context *ctx, const GLvoid *vdpSurface, + GLuint index) +{ + int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); + uint32_t device = (uintptr_t)ctx->vdpDevice; + + struct VdpSurfaceDMABufDesc desc; + VdpVideoSurfaceDMABuf *f; + + getProcAddr = (void *)ctx->vdpGetProcAddress; + if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_DMA_BUF, (void**)&f)) + return NULL; + + if (f((uintptr_t)vdpSurface, index, &desc) != VDP_STATUS_OK) + return NULL; + + return st_vdpau_resource_from_description(ctx, &desc); +} + static void st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access, GLboolean output, struct gl_texture_object *texObj, struct gl_texture_image *texImage, const GLvoid *vdpSurface, GLuint index) { - int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); - uint32_t device = (uintptr_t)ctx->vdpDevice; - struct st_context *st = st_context(ctx); struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); - + struct pipe_resource *res; struct pipe_sampler_view templ, **sampler_view; mesa_format texFormat; - getProcAddr = (void *)ctx->vdpGetProcAddress; if (output) { - VdpOutputSurfaceGallium *f; - - if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_GALLIUM, (void**)&f)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV"); - return; - } - - res = f((uintptr_t)vdpSurface); + res = st_vdpau_output_surface_dma_buf(ctx, vdpSurface); - if (!res) { - _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV"); - return; - } + if (!res) + res = st_vdpau_output_surface_gallium(ctx, vdpSurface); } else { - struct pipe_sampler_view *sv; - VdpVideoSurfaceGallium *f; - - struct pipe_video_buffer *buffer; - struct pipe_sampler_view **samplers; - - if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_GALLIUM, (void**)&f)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV"); - return; - } - - buffer = f((uintptr_t)vdpSurface); - if (!buffer) { - _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV"); - return; - } - - samplers = buffer->get_sampler_view_planes(buffer); - if (!samplers) { - _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV"); - return; - } - - sv = samplers[index >> 1]; - if (!sv) { - _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV"); - return; - } - - res = sv->texture; + res = st_vdpau_video_surface_dma_buf(ctx, vdpSurface, index); + + if (!res) + res = st_vdpau_video_surface_gallium(ctx, vdpSurface, index); } if (!res) { -- cgit v1.2.3 From f8c69fbb5491c8790dd3bcf991f06151d15d92b9 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Mar 2016 15:35:07 -0700 Subject: Revert "i965: Set address rounding bits for GL_NEAREST filtering as well." This reverts commit 60d6a8989ab44cf47accee6bc692ba6fb98f6a9f. It's pretty sketchy, and apparently regressed a bunch of dEQP tests on Sandybridge. --- src/mesa/drivers/dri/i965/brw_sampler_state.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 7bd21f7aaf0..3bd22c7559f 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -436,17 +436,14 @@ brw_update_sampler_state(struct brw_context *brw, } } - /* Set address rounding bits. The conditions are empirically - * derived in order to pass test cases. - */ - bool round_nearest = brw->gen >= 6 && target != GL_TEXTURE_3D; + /* Set address rounding bits if not using nearest filtering. */ unsigned address_rounding = 0; - if (min_filter != BRW_MAPFILTER_NEAREST || round_nearest) { + if (min_filter != BRW_MAPFILTER_NEAREST) { address_rounding |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | BRW_ADDRESS_ROUNDING_ENABLE_R_MIN; } - if (mag_filter != BRW_MAPFILTER_NEAREST || round_nearest) { + if (mag_filter != BRW_MAPFILTER_NEAREST) { address_rounding |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; -- cgit v1.2.3 From d4a5a61d445e683c20de00c1febe847b4c2db910 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Mar 2016 20:07:13 -0700 Subject: i965: Don't use CUBE wrap modes for integer formats on IVB/BYT. There is no linear filtering for integer formats, so we should always be using CLAMP_TO_EDGE mode. Fixes 46 dEQP cases on Ivybridge (which were likely broken by commit 0faf26e6a0a34c3544644852802484f2404cc83e). This workaround doesn't appear to be necessary on any other hardware; I haven't found any documentation mentioning errata in this area. v2: Only apply on Ivybridge/Baytrail to avoid regressing GLES3.1 tests. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand [v1] --- src/mesa/drivers/dri/i965/brw_sampler_state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 3bd22c7559f..1dc7d71929c 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -459,8 +459,12 @@ brw_update_sampler_state(struct brw_context *brw, target == GL_TEXTURE_CUBE_MAP_ARRAY) { /* Cube maps must use the same wrap mode for all three coordinate * dimensions. Prior to Haswell, only CUBE and CLAMP are valid. + * + * Ivybridge and Baytrail seem to have problems with CUBE mode and + * integer formats. Fall back to CLAMP for now. */ - if (tex_cube_map_seamless || sampler->CubeMapSeamless) { + if ((tex_cube_map_seamless || sampler->CubeMapSeamless) && + !(brw->gen == 7 && !brw->is_haswell && is_integer_format)) { wrap_s = BRW_TEXCOORDMODE_CUBE; wrap_t = BRW_TEXCOORDMODE_CUBE; wrap_r = BRW_TEXCOORDMODE_CUBE; -- cgit v1.2.3 From 105fe527840ed5fc55e78c4561915fbe1fded76c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Mar 2016 11:24:33 -0600 Subject: mesa: new _mesa_prepare_mipmap_levels() function for mipmap generation Simplifies the loops in generate_mipmap_uncompressed() and generate_mipmap_compressed(). Will be used in the state tracker too. Could probably be used in the meta code. If so, some additional clean-ups can be done after that. v2: use unsigned types instead of GLuint, per Ian Reviewed-by: Roland Scheidegger Reviewed-by: Jose Fonseca --- src/mesa/main/mipmap.c | 88 ++++++++++++++++++++++++++++++++------------------ src/mesa/main/mipmap.h | 5 +++ 2 files changed, 62 insertions(+), 31 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 5a02780b960..cb9afdef2af 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1872,6 +1872,49 @@ _mesa_prepare_mipmap_level(struct gl_context *ctx, } +/** + * Prepare all mipmap levels beyond 'baseLevel' for mipmap generation. + * When finished, all the gl_texture_image structures for the smaller + * mipmap levels will be consistent with the base level (in terms of + * dimensions, format, etc). + */ +void +_mesa_prepare_mipmap_levels(struct gl_context *ctx, + struct gl_texture_object *texObj, + unsigned baseLevel, unsigned maxLevel) +{ + const struct gl_texture_image *baseImage = + _mesa_select_tex_image(texObj, texObj->Target, baseLevel); + const GLint border = 0; + GLint width = baseImage->Width; + GLint height = baseImage->Height; + GLint depth = baseImage->Depth; + const GLenum intFormat = baseImage->InternalFormat; + const mesa_format texFormat = baseImage->TexFormat; + GLint newWidth, newHeight, newDepth; + + /* Prepare baseLevel + 1, baseLevel + 2, ... */ + for (unsigned level = baseLevel + 1; level <= maxLevel; level++) { + if (!_mesa_next_mipmap_level_size(texObj->Target, border, + width, height, depth, + &newWidth, &newHeight, &newDepth)) { + /* all done */ + break; + } + + if (!_mesa_prepare_mipmap_level(ctx, texObj, level, + newWidth, newHeight, newDepth, + border, intFormat, texFormat)) { + break; + } + + width = newWidth; + height = newHeight; + depth = newDepth; + } +} + + static void generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj, @@ -1892,7 +1935,6 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target, GLint dstWidth, dstHeight, dstDepth; GLint border; GLint slice; - GLboolean nextLevel; GLubyte **srcMaps, **dstMaps; GLboolean success = GL_TRUE; @@ -1904,22 +1946,14 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target, srcDepth = srcImage->Depth; border = srcImage->Border; - nextLevel = _mesa_next_mipmap_level_size(target, border, - srcWidth, srcHeight, srcDepth, - &dstWidth, &dstHeight, &dstDepth); - if (!nextLevel) - return; - - if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1, - dstWidth, dstHeight, dstDepth, - border, srcImage->InternalFormat, - srcImage->TexFormat)) { - return; - } - /* get dest gl_texture_image */ dstImage = _mesa_select_tex_image(texObj, target, level + 1); - assert(dstImage); + if (!dstImage) { + break; + } + dstWidth = dstImage->Width; + dstHeight = dstImage->Height; + dstDepth = dstImage->Depth; if (target == GL_TEXTURE_1D_ARRAY) { srcDepth = srcHeight; @@ -2087,7 +2121,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target, GLint srcWidth, srcHeight, srcDepth; GLint dstWidth, dstHeight, dstDepth; GLint border; - GLboolean nextLevel; GLuint temp_dst_row_stride, temp_dst_img_stride; /* in bytes */ GLint i; @@ -2099,23 +2132,14 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target, srcDepth = srcImage->Depth; border = srcImage->Border; - nextLevel = _mesa_next_mipmap_level_size(target, border, - srcWidth, srcHeight, srcDepth, - &dstWidth, &dstHeight, &dstDepth); - if (!nextLevel) - goto end; - - if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1, - dstWidth, dstHeight, dstDepth, - border, srcImage->InternalFormat, - srcImage->TexFormat)) { - /* all done */ - goto end; - } - /* get dest gl_texture_image */ dstImage = _mesa_select_tex_image(texObj, target, level + 1); - assert(dstImage); + if (!dstImage) { + break; + } + dstWidth = dstImage->Width; + dstHeight = dstImage->Height; + dstDepth = dstImage->Depth; /* Compute dst image strides and alloc memory on first iteration */ temp_dst_row_stride = _mesa_format_row_stride(temp_format, dstWidth); @@ -2194,6 +2218,8 @@ _mesa_generate_mipmap(struct gl_context *ctx, GLenum target, maxLevel = MIN2(maxLevel, texObj->MaxLevel); + _mesa_prepare_mipmap_levels(ctx, texObj, texObj->BaseLevel, maxLevel); + if (_mesa_is_format_compressed(srcImage->TexFormat)) { generate_mipmap_compressed(ctx, target, texObj, srcImage, maxLevel); } else { diff --git a/src/mesa/main/mipmap.h b/src/mesa/main/mipmap.h index c0366d329a2..33913e88417 100644 --- a/src/mesa/main/mipmap.h +++ b/src/mesa/main/mipmap.h @@ -47,6 +47,11 @@ _mesa_prepare_mipmap_level(struct gl_context *ctx, GLsizei width, GLsizei height, GLsizei depth, GLsizei border, GLenum intFormat, mesa_format format); +void +_mesa_prepare_mipmap_levels(struct gl_context *ctx, + struct gl_texture_object *texObj, + unsigned baseLevel, unsigned maxLevel); + extern void _mesa_generate_mipmap(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj); -- cgit v1.2.3 From d8d029f22bcb49c26ddb11d0bb968d24f2a5fb7e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Mar 2016 14:09:24 -0600 Subject: st/mesa: simplify st_generate_mipmap() The whole st_generate_mipmap() function was overly complicated. Now we just call the new _mesa_prepare_mipmap_levels() function to prepare the texture mipmap memory, then call the generate function which fills in the texture images. This fixes a failed assertion in llvmpipe/softpipe which is hit with the new piglit generatemipmap-base-change test. Also fixes some device errors (format mismatches) with the VMware svga driver. v2: fix a comment typo, per Sinclair Reviewed-by: Sinclair Yeh Reviewed-by: Jose Fonseca Reviewed-by: Roland Scheidegger --- src/mesa/state_tracker/st_gen_mipmap.c | 102 ++++++++------------------------- 1 file changed, 24 insertions(+), 78 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index c4b3492b0d3..a14bbfabaa3 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -82,7 +82,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, const uint baseLevel = texObj->BaseLevel; enum pipe_format format; uint lastLevel, first_layer, last_layer; - uint dstLevel; if (!pt) return; @@ -103,42 +102,33 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, stObj->lastLevel = lastLevel; if (!texObj->Immutable) { - if (pt->last_level < lastLevel) { - /* The current gallium texture doesn't have space for all the - * mipmap levels we need to generate. So allocate a new texture. - */ - struct pipe_resource *oldTex = stObj->pt; - - /* create new texture with space for more levels */ - stObj->pt = st_texture_create(st, - oldTex->target, - oldTex->format, - lastLevel, - oldTex->width0, - oldTex->height0, - oldTex->depth0, - oldTex->array_size, - 0, - oldTex->bind); - - /* This will copy the old texture's base image into the new texture - * which we just allocated. - */ - st_finalize_texture(ctx, st->pipe, texObj); - - /* release the old tex (will likely be freed too) */ - pipe_resource_reference(&oldTex, NULL); - st_texture_release_all_sampler_views(st, stObj); - } - else { - /* Make sure that the base texture image data is present in the - * texture buffer. - */ - st_finalize_texture(ctx, st->pipe, texObj); - } + const GLboolean genSave = texObj->GenerateMipmap; + + /* Temporarily set GenerateMipmap to true so that allocate_full_mipmap() + * makes the right decision about full mipmap allocation. + */ + texObj->GenerateMipmap = GL_TRUE; + + _mesa_prepare_mipmap_levels(ctx, texObj, baseLevel, lastLevel); + + texObj->GenerateMipmap = genSave; + + /* At this point, memory for all the texture levels has been + * allocated. However, the base level image may be in one resource + * while the subsequent/smaller levels may be in another resource. + * Finalizing the texture will copy the base images from the former + * resource to the latter. + * + * After this, we'll have all mipmap levels in one resource. + */ + st_finalize_texture(ctx, st->pipe, texObj); } pt = stObj->pt; + if (!pt) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "mipmap generation"); + return; + } assert(pt->last_level >= lastLevel); @@ -169,48 +159,4 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, _mesa_generate_mipmap(ctx, target, texObj); } } - - /* Fill in the Mesa gl_texture_image fields */ - for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { - const uint srcLevel = dstLevel - 1; - const struct gl_texture_image *srcImage - = _mesa_get_tex_image(ctx, texObj, target, srcLevel); - struct gl_texture_image *dstImage; - struct st_texture_image *stImage; - uint border = srcImage->Border; - uint dstWidth, dstHeight, dstDepth; - - dstWidth = u_minify(pt->width0, dstLevel); - if (texObj->Target == GL_TEXTURE_1D_ARRAY) { - dstHeight = pt->array_size; - } - else { - dstHeight = u_minify(pt->height0, dstLevel); - } - if (texObj->Target == GL_TEXTURE_2D_ARRAY || - texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) { - dstDepth = pt->array_size; - } - else { - dstDepth = u_minify(pt->depth0, dstLevel); - } - - dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); - if (!dstImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); - return; - } - - /* Free old image data */ - ctx->Driver.FreeTextureImageBuffer(ctx, dstImage); - - /* initialize new image */ - _mesa_init_teximage_fields(ctx, dstImage, dstWidth, dstHeight, - dstDepth, border, srcImage->InternalFormat, - srcImage->TexFormat); - - stImage = st_texture_image(dstImage); - - pipe_resource_reference(&stImage->pt, pt); - } } -- cgit v1.2.3 From 75b713455c77931e5e90c30e65ac99d74dfb17f0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 26 Mar 2016 11:35:00 -0600 Subject: xlib: add support for GLX_ARB_create_context This adds the glXCreateContextAttribsARB() function for the xlib/swrast driver. This allows more piglit tests to run with this driver. For example, without this patch we get: $ bin/fbo-generatemipmap-1d -auto piglit: error: waffle_config_choose failed due to WAFFLE_ERROR_UNSUPPORTED_ ON_PLATFORM: GLX_ARB_create_context is required in order to request an OpenGL version not equal to the default value 1.0 piglit: error: Failed to create waffle_config for OpenGL 2.0 Compatibility Context piglit: info: Failed to create any GL context PIGLIT: {"result": "skip" } Reviewed-by: Jose Fonseca Acked-by: Roland Scheidegger --- src/mesa/drivers/x11/fakeglx.c | 52 ++++++++++++++++++++++++++++++++++++++++++ src/mesa/drivers/x11/glxapi.c | 20 ++++++++++++++++ src/mesa/drivers/x11/glxapi.h | 5 ++++ 3 files changed, 77 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 9286f718d00..80b71765e6c 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -74,6 +74,7 @@ "GLX_MESA_copy_sub_buffer " \ "GLX_MESA_pixmap_colormap " \ "GLX_MESA_release_buffers " \ + "GLX_ARB_create_context " \ "GLX_ARB_get_proc_address " \ "GLX_EXT_texture_from_pixmap " \ "GLX_EXT_visual_info " \ @@ -2831,6 +2832,56 @@ Fake_glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer) } +static GLXContext +Fake_glXCreateContextAttribs(Display *dpy, GLXFBConfig config, + GLXContext share_context, Bool direct, + const int *attrib_list) +{ + XMesaContext xmCtx; + XMesaVisual xmvis = (XMesaVisual) config; + int i; + int major = 0, minor = 0, ctxFlags = 0, profileFlags = 0; + + for (i = 0; attrib_list[i]; i += 2) { + switch (attrib_list[i]) { + case GLX_CONTEXT_MAJOR_VERSION_ARB: + major = attrib_list[i + 1]; + break; + case GLX_CONTEXT_MINOR_VERSION_ARB: + minor = attrib_list[i + 1]; + break; + case GLX_CONTEXT_FLAGS_ARB: + ctxFlags = attrib_list[i + 1]; + break; + case GLX_CONTEXT_PROFILE_MASK_ARB: + profileFlags = attrib_list[i + 1]; + break; + default: + fprintf(stderr, "Bad attribute in glXCreateContextAttribs()\n"); + return 0; + } + } + + if (major * 10 + minor > 21) { + /* swrast only supports GL 2.1 and earlier */ + return 0; + } + + /* These are ignored for now. We'd have to enhance XMesaCreateContext + * to take these flags and the version, at least. + */ + (void) ctxFlags; + (void) profileFlags; + + /* deallocate unused windows/buffers */ + XMesaGarbageCollect(dpy); + + xmCtx = XMesaCreateContext(xmvis, (XMesaContext) share_context); + + return (GLXContext) xmCtx; +} + + /* silence warning */ extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void); @@ -2990,5 +3041,6 @@ _mesa_GetGLXDispatchTable(void) glx.BindTexImageEXT = Fake_glXBindTexImageEXT; glx.ReleaseTexImageEXT = Fake_glXReleaseTexImageEXT; + glx.CreateContextAttribs = Fake_glXCreateContextAttribs; return &glx; } diff --git a/src/mesa/drivers/x11/glxapi.c b/src/mesa/drivers/x11/glxapi.c index a870e94ed4a..cc1bb2ab4b3 100644 --- a/src/mesa/drivers/x11/glxapi.c +++ b/src/mesa/drivers/x11/glxapi.c @@ -1319,6 +1319,9 @@ static struct name_address_pair GLX_functions[] = { { "glXBindTexImageEXT", (__GLXextFuncPtr) glXBindTexImageEXT }, { "glXReleaseTexImageEXT", (__GLXextFuncPtr) glXReleaseTexImageEXT }, + /*** GLX_ARB_create_context ***/ + { "glXCreateContextAttribsARB", (__GLXextFuncPtr) glXCreateContextAttribsARB }, + { NULL, NULL } /* end of list */ }; @@ -1370,3 +1373,20 @@ void PUBLIC { return glXGetProcAddressARB(procName); } + + +/** + * Added in GLX_ARB_create_context. + */ +GLXContext PUBLIC +glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, + GLXContext share_context, Bool direct, + const int *attrib_list) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateContextAttribs)(dpy, config, share_context, direct, + attrib_list); +} diff --git a/src/mesa/drivers/x11/glxapi.h b/src/mesa/drivers/x11/glxapi.h index bd6e97053e6..aff38f7531d 100644 --- a/src/mesa/drivers/x11/glxapi.h +++ b/src/mesa/drivers/x11/glxapi.h @@ -201,6 +201,11 @@ struct _glxapi_table { void (*BindTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list); void (*ReleaseTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer); + + /*** GLX_ARB_create_context ***/ + GLXContext (*CreateContextAttribs)(Display *dpy, GLXFBConfig config, + GLXContext share_context, Bool direct, + const int *attrib_list); }; -- cgit v1.2.3 From ed39de90f1cd209b10baeed8ae98b1f56127c8de Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 29 Mar 2016 17:44:00 -0600 Subject: meta: use _mesa_prepare_mipmap_levels() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prepare_mipmap_level() wrapper for _mesa_prepare_mipmap_level() is not needed. It only served to undo the GL_TEXTURE_1D_ARRAY height/depth change was was made before the call to prepare_mipmap_level() Said another way, regardless of how the meta code manipulates the height/ depth dims for GL_TEXTURE_1D_ARRAY, the gl_texture_image dimensions are correctly set up by _mesa_prepare_mipmap_levels(). Tested by plugging _mesa_meta_GenerateMipmap() into the swrast driver and testing with piglit. v2 (idr): Early out of the mipmap generation loop with dstImage is NULL. This can occur for immutable textures that have a limited range of levels or in the presense of memory allocation failures. Fixes arb_texture_view-mipgen on Intel platforms. Reviewed-by: José Fonseca Reviewed-by: Roland Scheidegger Reviewed-by: Ian Romanick Tested-by: Ian Romanick Reviewed-by: Brian Paul --- src/mesa/drivers/common/meta_generate_mipmap.c | 32 +++++++------------------- 1 file changed, 8 insertions(+), 24 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index d4b75390ebf..b81e179e2cd 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -137,21 +137,6 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gl_context *ctx, _mesa_meta_blit_shader_table_cleanup(ctx, &mipmap->shaders); } -static GLboolean -prepare_mipmap_level(struct gl_context *ctx, - struct gl_texture_object *texObj, GLuint level, - GLsizei width, GLsizei height, GLsizei depth, - GLenum intFormat, mesa_format format) -{ - if (texObj->Target == GL_TEXTURE_1D_ARRAY) { - /* Work around Mesa expecting the number of array slices in "height". */ - height = depth; - depth = 1; - } - - return _mesa_prepare_mipmap_level(ctx, texObj, level, width, height, depth, - 0, intFormat, format); -} /** * Called via ctx->Driver.GenerateMipmap() @@ -270,6 +255,8 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, /* texture is already locked, unlock now */ _mesa_unlock_texture(ctx, texObj); + _mesa_prepare_mipmap_levels(ctx, texObj, baseLevel, maxLevel); + for (dstLevel = baseLevel + 1; dstLevel <= maxLevel; dstLevel++) { const struct gl_texture_image *srcImage; struct gl_texture_image *dstImage; @@ -309,17 +296,14 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, (GLint *) &dstLevel, false); - if (!prepare_mipmap_level(ctx, texObj, dstLevel, - dstWidth, dstHeight, dstDepth, - srcImage->InternalFormat, - srcImage->TexFormat)) { - /* All done. We either ran out of memory or we would go beyond the - * last valid level of an immutable texture if we continued. - */ - break; - } dstImage = _mesa_select_tex_image(texObj, faceTarget, dstLevel); + /* All done. We either ran out of memory or we would go beyond the last + * valid level of an immutable texture if we continued. + */ + if (dstImage == NULL) + break; + /* limit minification to src level */ _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, (GLint *) &srcLevel, false); -- cgit v1.2.3 From 513384d7e8db294d54a910f40c90492c211abc54 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 28 Mar 2016 17:27:27 -0600 Subject: mesa: make _mesa_prepare_mipmap_level() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No longer called from any other file. Reviewed-by: José Fonseca Reviewed-by: Roland Scheidegger Reviewed-by: Ian Romanick Tested-by: Ian Romanick --- src/mesa/main/mipmap.c | 16 ++++++++-------- src/mesa/main/mipmap.h | 7 ------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index cb9afdef2af..5ff53f4265c 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1810,11 +1810,11 @@ _mesa_next_mipmap_level_size(GLenum target, GLint border, * for mipmap generation. If not, (re) allocate it. * \return GL_TRUE if successful, GL_FALSE if mipmap generation should stop */ -GLboolean -_mesa_prepare_mipmap_level(struct gl_context *ctx, - struct gl_texture_object *texObj, GLuint level, - GLsizei width, GLsizei height, GLsizei depth, - GLsizei border, GLenum intFormat, mesa_format format) +static GLboolean +prepare_mipmap_level(struct gl_context *ctx, + struct gl_texture_object *texObj, GLuint level, + GLsizei width, GLsizei height, GLsizei depth, + GLsizei border, GLenum intFormat, mesa_format format) { const GLuint numFaces = _mesa_num_tex_faces(texObj->Target); GLuint face; @@ -1902,9 +1902,9 @@ _mesa_prepare_mipmap_levels(struct gl_context *ctx, break; } - if (!_mesa_prepare_mipmap_level(ctx, texObj, level, - newWidth, newHeight, newDepth, - border, intFormat, texFormat)) { + if (!prepare_mipmap_level(ctx, texObj, level, + newWidth, newHeight, newDepth, + border, intFormat, texFormat)) { break; } diff --git a/src/mesa/main/mipmap.h b/src/mesa/main/mipmap.h index 33913e88417..d11c7fada37 100644 --- a/src/mesa/main/mipmap.h +++ b/src/mesa/main/mipmap.h @@ -40,13 +40,6 @@ _mesa_generate_mipmap_level(GLenum target, GLubyte **dstData, GLint dstRowStride); - -extern GLboolean -_mesa_prepare_mipmap_level(struct gl_context *ctx, - struct gl_texture_object *texObj, GLuint level, - GLsizei width, GLsizei height, GLsizei depth, - GLsizei border, GLenum intFormat, mesa_format format); - void _mesa_prepare_mipmap_levels(struct gl_context *ctx, struct gl_texture_object *texObj, -- cgit v1.2.3 From 553e37aa337783d468f218291f6de6a74e49289b Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 29 Mar 2016 15:31:05 -0400 Subject: mesa: allow mutable buffer textures to back GL ES images Since there is no way to create immutable texture buffers in GL ES, mutable buffer textures are allowed to back images. See issue 7 of the GL_OES_texture_buffer specification. Signed-off-by: Ilia Mirkin Reviewed-by: Samuel Pitoiset --- src/mesa/main/shaderimage.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index fd5934f939f..90643c4ed6d 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -583,8 +583,13 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level, * * "An INVALID_OPERATION error is generated if texture is not the name * of an immutable texture object." + * + * However note that issue 7 of the GL_OES_texture_buffer spec + * recognizes that there is no way to create immutable buffer textures, + * so those are excluded from this requirement. */ - if (_mesa_is_gles(ctx) && !t->Immutable) { + if (_mesa_is_gles(ctx) && !t->Immutable && + t->Target != GL_TEXTURE_BUFFER) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBindImageTexture(!immutable)"); return; -- cgit v1.2.3 From a74fc3fe8ada87e1fedeea86f2d93f736a1217bc Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 24 Nov 2015 17:17:29 -0800 Subject: i965: Don't inline intel_batchbuffer_require_space(). It's called by the inline intel_batchbuffer_begin() function which itself is used in BEGIN_BATCH. So in sequence of code emitting multiple packets, we have inlined this ~200 byte function multiple times. Making it an out-of-line function presumably improved icache usage. Improves performance of Gl32Batch7 by 3.39898% +/- 0.358674% (n=155) on Ivybridge. Reviewed-by: Abdiel Janulgue --- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 26 +++++++++++++++++++++++++ src/mesa/drivers/dri/i965/intel_batchbuffer.h | 28 ++------------------------- 2 files changed, 28 insertions(+), 26 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index f77807472fd..e41f927819e 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -106,6 +106,32 @@ intel_batchbuffer_free(struct brw_context *brw) drm_intel_bo_unreference(brw->batch.bo); } +void +intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, + enum brw_gpu_ring ring) +{ + /* If we're switching rings, implicitly flush the batch. */ + if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING && + brw->gen >= 6) { + intel_batchbuffer_flush(brw); + } + +#ifdef DEBUG + assert(sz < BATCH_SZ - BATCH_RESERVED); +#endif + if (intel_batchbuffer_space(brw) < sz) + intel_batchbuffer_flush(brw); + + enum brw_gpu_ring prev_ring = brw->batch.ring; + /* The intel_batchbuffer_flush() calls above might have changed + * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end. + */ + brw->batch.ring = ring; + + if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING)) + intel_batchbuffer_emit_render_ring_prelude(brw); +} + static void do_batch_dump(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index f47369029a0..aa1dc38babc 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -44,6 +44,8 @@ void intel_batchbuffer_init(struct brw_context *brw); void intel_batchbuffer_free(struct brw_context *brw); void intel_batchbuffer_save_state(struct brw_context *brw); void intel_batchbuffer_reset_to_saved(struct brw_context *brw); +void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, + enum brw_gpu_ring ring); int _intel_batchbuffer_flush(struct brw_context *brw, const char *file, int line); @@ -116,32 +118,6 @@ intel_batchbuffer_emit_float(struct brw_context *brw, float f) intel_batchbuffer_emit_dword(brw, float_as_int(f)); } -static inline void -intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, - enum brw_gpu_ring ring) -{ - /* If we're switching rings, implicitly flush the batch. */ - if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING && - brw->gen >= 6) { - intel_batchbuffer_flush(brw); - } - -#ifdef DEBUG - assert(sz < BATCH_SZ - BATCH_RESERVED); -#endif - if (intel_batchbuffer_space(brw) < sz) - intel_batchbuffer_flush(brw); - - enum brw_gpu_ring prev_ring = brw->batch.ring; - /* The intel_batchbuffer_flush() calls above might have changed - * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end. - */ - brw->batch.ring = ring; - - if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING)) - intel_batchbuffer_emit_render_ring_prelude(brw); -} - static inline void intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring) { -- cgit v1.2.3 From 258299d87ad932246ae8b1aa979b4a1a398db155 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 3 Mar 2016 13:20:01 +1100 Subject: glsl: use bitmask of active xfb buffer indices This allows us to print the correct binding point when not all buffers declared in the shader are bound. For example if we use a single buffer: layout(xfb_buffer=2, offset=0) out vec4 v; We now print '2' when the buffer is not bound rather than '0'. Reviewed-by: Dave Airlie --- src/compiler/glsl/link_varyings.cpp | 11 +++++++++-- src/mesa/drivers/dri/i965/gen6_sol.c | 2 +- src/mesa/main/mtypes.h | 6 ++---- src/mesa/main/transformfeedback.c | 36 ++++++++++++++++++++---------------- src/mesa/main/transformfeedback.h | 2 +- 5 files changed, 33 insertions(+), 24 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 8aefb7e201d..cb0c8baf8aa 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -872,6 +872,11 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, unsigned num_tfeedback_decls, tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers) { + /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for + * tracking the number of buffers doesn't overflow. + */ + assert(ctx->Const.MaxTransformFeedbackBuffers < 32); + bool separate_attribs_mode = prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; @@ -904,6 +909,7 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, num_outputs); unsigned num_buffers = 0; + unsigned buffers = 0; if (!has_xfb_qualifiers && separate_attribs_mode) { /* GL_SEPARATE_ATTRIBS */ @@ -913,6 +919,7 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, has_xfb_qualifiers)) return false; + buffers |= 1 << num_buffers; num_buffers++; } } @@ -949,6 +956,7 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, } else { buffer = num_buffers; } + buffers |= 1 << num_buffers; if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback, @@ -956,12 +964,11 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, has_xfb_qualifiers)) return false; } - num_buffers++; } assert(prog->LinkedTransformFeedback.NumOutputs == num_outputs); - prog->LinkedTransformFeedback.NumBuffers = num_buffers; + prog->LinkedTransformFeedback.ActiveBuffers = buffers; return true; } diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index 2f6eadffd2e..08d4e1b52ca 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -256,7 +256,7 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, * overflowing any of the buffers currently being used for feedback. */ unsigned max_index - = _mesa_compute_max_transform_feedback_vertices(xfb_obj, + = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj, linked_xfb_info); /* Initialize the SVBI 0 register to zero and set the maximum index. */ diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index ae0c8a84dda..02e60626b4a 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1649,10 +1649,8 @@ struct gl_transform_feedback_info { unsigned NumOutputs; - /** - * Number of transform feedback buffers in use by this program. - */ - unsigned NumBuffers; + /* Bitmask of active buffer indices. */ + unsigned ActiveBuffers; struct gl_transform_feedback_output *Outputs; diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index f73a89f6c0f..39ba3dcd7c0 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -347,23 +347,25 @@ compute_transform_feedback_buffer_sizes( * enabled transform feedback buffers without overflowing any of them. */ unsigned -_mesa_compute_max_transform_feedback_vertices( +_mesa_compute_max_transform_feedback_vertices(struct gl_context *ctx, const struct gl_transform_feedback_object *obj, const struct gl_transform_feedback_info *info) { unsigned max_index = 0xffffffff; unsigned i; - for (i = 0; i < info->NumBuffers; ++i) { - unsigned stride = info->BufferStride[i]; - unsigned max_for_this_buffer; + for (i = 0; i < ctx->Const.MaxTransformFeedbackBuffers; i++) { + if ((info->ActiveBuffers >> i) & 1) { + unsigned stride = info->BufferStride[i]; + unsigned max_for_this_buffer; - /* Skip any inactive buffers, which have a stride of 0. */ - if (stride == 0) - continue; + /* Skip any inactive buffers, which have a stride of 0. */ + if (stride == 0) + continue; - max_for_this_buffer = obj->Size[i] / (4 * stride); - max_index = MIN2(max_index, max_for_this_buffer); + max_for_this_buffer = obj->Size[i] / (4 * stride); + max_index = MIN2(max_index, max_for_this_buffer); + } } return max_index; @@ -445,12 +447,14 @@ _mesa_BeginTransformFeedback(GLenum mode) return; } - for (i = 0; i < info->NumBuffers; ++i) { - if (obj->BufferNames[i] == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glBeginTransformFeedback(binding point %d does not have " - "a buffer object bound)", i); - return; + for (i = 0; i < ctx->Const.MaxTransformFeedbackBuffers; i++) { + if ((info->ActiveBuffers >> i) & 1) { + if (obj->BufferNames[i] == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBeginTransformFeedback(binding point %d does not " + "have a buffer object bound)", i); + return; + } } } @@ -470,7 +474,7 @@ _mesa_BeginTransformFeedback(GLenum mode) * feedback. */ unsigned max_vertices - = _mesa_compute_max_transform_feedback_vertices(obj, info); + = _mesa_compute_max_transform_feedback_vertices(ctx, obj, info); obj->GlesRemainingPrims = max_vertices / vertices_per_prim; } diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h index eb274ad6540..c83f917a532 100644 --- a/src/mesa/main/transformfeedback.h +++ b/src/mesa/main/transformfeedback.h @@ -50,7 +50,7 @@ extern void _mesa_init_transform_feedback_functions(struct dd_function_table *driver); extern unsigned -_mesa_compute_max_transform_feedback_vertices( +_mesa_compute_max_transform_feedback_vertices( struct gl_context *ctx, const struct gl_transform_feedback_object *obj, const struct gl_transform_feedback_info *info); -- cgit v1.2.3 From cf039a309a36ba537b45f3bfe7e5a154c87d51ad Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 10 Mar 2016 15:00:00 +1100 Subject: mesa: split transform feedback buffer into its own struct This will be used in a following patch to implement interface query support for TRANSFORM_FEEDBACK_BUFFER. Reviewed-by: Dave Airlie --- src/compiler/glsl/link_varyings.cpp | 11 ++++++----- src/mesa/drivers/dri/i965/gen6_sol.c | 4 ++-- src/mesa/drivers/dri/i965/gen7_sol_state.c | 2 +- src/mesa/drivers/dri/i965/gen8_sol_state.c | 8 ++++---- src/mesa/main/mtypes.h | 30 +++++++++++++++++++----------- src/mesa/main/transformfeedback.c | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 7 files changed, 34 insertions(+), 25 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index cb0c8baf8aa..5645f783f3f 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -722,7 +722,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, /* Handle gl_SkipComponents. */ if (this->skip_components) { - info->BufferStride[buffer] += this->skip_components; + info->Buffers[buffer].Stride += this->skip_components; return true; } @@ -734,7 +734,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, * and the buffer mode is INTERLEAVED_ATTRIBS_EXT. */ if (prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS && - info->BufferStride[buffer] + this->num_components() > + info->Buffers[buffer].Stride + this->num_components() > ctx->Const.MaxTransformFeedbackInterleavedComponents) { linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " "limit has been exceeded."); @@ -752,10 +752,11 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, info->Outputs[info->NumOutputs].NumComponents = output_size; info->Outputs[info->NumOutputs].StreamId = stream_id; info->Outputs[info->NumOutputs].OutputBuffer = buffer; - info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; + info->Outputs[info->NumOutputs].DstOffset = + info->Buffers[buffer].Stride; ++info->NumOutputs; - info->BufferStride[buffer] += output_size; - info->BufferStream[buffer] = this->stream_id; + info->Buffers[buffer].Stride += output_size; + info->Buffers[buffer].Stream = this->stream_id; num_components -= output_size; location++; location_frac = 0; diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index 08d4e1b52ca..24bb4b41b1e 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -69,13 +69,13 @@ gen6_update_sol_surfaces(struct brw_context *brw) brw, xfb_obj->Buffers[buffer], &brw->gs.base.surf_offset[surf_index], linked_xfb_info->Outputs[i].NumComponents, - linked_xfb_info->BufferStride[buffer], buffer_offset); + linked_xfb_info->Buffers[buffer].Stride, buffer_offset); } else { brw_update_sol_surface( brw, xfb_obj->Buffers[buffer], &brw->ff_gs.surf_offset[surf_index], linked_xfb_info->Outputs[i].NumComponents, - linked_xfb_info->BufferStride[buffer], buffer_offset); + linked_xfb_info->Buffers[buffer].Stride, buffer_offset); } } else { if (!brw->geometry_program) diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index 8cd2fc4b48a..c44572c3438 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -70,7 +70,7 @@ upload_3dstate_so_buffers(struct brw_context *brw) continue; } - stride = linked_xfb_info->BufferStride[i] * 4; + stride = linked_xfb_info->Buffers[i].Stride * 4; start = xfb_obj->Offset[i]; assert(start % 4 == 0); diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c b/src/mesa/drivers/dri/i965/gen8_sol_state.c index 58ead68e90c..f30818031f4 100644 --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c @@ -139,13 +139,13 @@ gen8_upload_3dstate_streamout(struct brw_context *brw, bool active, /* Set buffer pitches; 0 means unbound. */ if (xfb_obj->Buffers[0]) - dw3 |= linked_xfb_info->BufferStride[0] * 4; + dw3 |= linked_xfb_info->Buffers[0].Stride * 4; if (xfb_obj->Buffers[1]) - dw3 |= (linked_xfb_info->BufferStride[1] * 4) << 16; + dw3 |= (linked_xfb_info->Buffers[1].Stride * 4) << 16; if (xfb_obj->Buffers[2]) - dw4 |= linked_xfb_info->BufferStride[2] * 4; + dw4 |= linked_xfb_info->Buffers[2].Stride * 4; if (xfb_obj->Buffers[3]) - dw4 |= (linked_xfb_info->BufferStride[3] * 4) << 16; + dw4 |= (linked_xfb_info->Buffers[3].Stride * 4) << 16; } BEGIN_BATCH(5); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 02e60626b4a..90c3851e72a 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1644,6 +1644,24 @@ struct gl_transform_feedback_output }; +struct gl_transform_feedback_buffer +{ + unsigned Binding; + + /** + * Total number of components stored in each buffer. This may be used by + * hardware back-ends to determine the correct stride when interleaving + * multiple transform feedback outputs in the same buffer. + */ + unsigned Stride; + + /** + * Which transform feedback stream this buffer binding is associated with. + */ + unsigned Stream; +}; + + /** Post-link transform feedback info. */ struct gl_transform_feedback_info { @@ -1661,17 +1679,7 @@ struct gl_transform_feedback_info struct gl_transform_feedback_varying_info *Varyings; GLint NumVarying; - /** - * Total number of components stored in each buffer. This may be used by - * hardware back-ends to determine the correct stride when interleaving - * multiple transform feedback outputs in the same buffer. - */ - unsigned BufferStride[MAX_FEEDBACK_BUFFERS]; - - /** - * Which transform feedback stream this buffer binding is associated with. - */ - unsigned BufferStream[MAX_FEEDBACK_BUFFERS]; + struct gl_transform_feedback_buffer Buffers[MAX_FEEDBACK_BUFFERS]; }; diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 39ba3dcd7c0..c92f0ccd5a5 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -356,7 +356,7 @@ _mesa_compute_max_transform_feedback_vertices(struct gl_context *ctx, for (i = 0; i < ctx->Const.MaxTransformFeedbackBuffers; i++) { if ((info->ActiveBuffers >> i) & 1) { - unsigned stride = info->BufferStride[i]; + unsigned stride = info->Buffers[i].Stride; unsigned max_for_this_buffer; /* Skip any inactive buffers, which have a stride of 0. */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 06b4bb41a9b..5e18e8be029 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -6861,7 +6861,7 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, } for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - so->stride[i] = info->BufferStride[i]; + so->stride[i] = info->Buffers[i].Stride; } so->num_outputs = info->NumOutputs; } -- cgit v1.2.3 From 8120e869b1cde7fd1a3679291782f2f50296cb45 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 11 Mar 2016 11:57:52 +1100 Subject: glsl: validate global out xfb_stride qualifiers and set stride on empty buffers Here we use the built-in validation in ast_layout_expression::process_qualifier_constant() to check for mismatching global out strides on buffers in a single shader. From the ARB_enhanced_layouts spec: "While *xfb_stride* can be declared multiple times for the same buffer, it is a compile-time or link-time error to have different values specified for the stride for the same buffer." For intrastage validation a new helper link_xfb_stride_layout_qualifiers() is created. We also take this opportunity to make sure stride is at least a multiple of 4, we will validate doubles at a later stage. From the ARB_enhanced_layouts spec: "If the buffer is capturing any double-typed outputs, the stride must be a multiple of 8, otherwise it must be a multiple of 4, or a compile-time or link-time error results." Finally we update store_tfeedback_info() to apply the strides to LinkedTransformFeedback and update the buffers bitmask to mark any global buffers with a stride as active. For example a shader with: layout (xfb_buffer = 0, xfb_offset = 0) out vec4 gs_fs; layout (xfb_buffer = 1, xfb_stride = 64) out; Is expected to have a buffer bound to both 0 and 1. From the ARB_enhanced_layouts spec: "A binding point requires a bound buffer object if and only if its associated stride in the program object used for transform feedback primitive capture is non-zero." Reviewed-by: Dave Airlie --- src/compiler/glsl/glsl_parser_extras.cpp | 11 ++++++ src/compiler/glsl/link_varyings.cpp | 11 ++++++ src/compiler/glsl/linker.cpp | 65 ++++++++++++++++++++++++++++++++ src/mesa/main/mtypes.h | 7 ++++ 4 files changed, 94 insertions(+) (limited to 'src/mesa') diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index b88b6220513..0ce89ceb3a8 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -1617,6 +1617,17 @@ set_shader_inout_layout(struct gl_shader *shader, assert(!state->fs_early_fragment_tests); } + for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) { + if (state->out_qualifier->out_xfb_stride[i]) { + unsigned xfb_stride; + if (state->out_qualifier->out_xfb_stride[i]-> + process_qualifier_constant(state, "xfb_stride", &xfb_stride, + true)) { + shader->TransformFeedback.BufferStride[i] = xfb_stride; + } + } + } + switch (shader->Stage) { case MESA_SHADER_TESS_CTRL: shader->TessCtrl.VerticesOut = 0; diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 5645f783f3f..d91642dea5c 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -930,6 +930,17 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, unsigned buffer = num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0; + /* Apply any xfb_stride global qualifiers */ + if (has_xfb_qualifiers) { + for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { + if (prog->TransformFeedback.BufferStride[j]) { + buffers |= 1 << j; + prog->LinkedTransformFeedback.Buffers[j].Stride = + prog->TransformFeedback.BufferStride[j] / 4; + } + } + } + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { if (tfeedback_decls[i].is_next_buffer_separator()) { num_buffers++; diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 3ae958cacb7..4f191c5002d 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -1584,6 +1584,69 @@ private: hash_table *unnamed_interfaces; }; +/** + * Check for conflicting xfb_stride default qualifiers and store buffer stride + * for later use. + */ +static void +link_xfb_stride_layout_qualifiers(struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) { + linked_shader->TransformFeedback.BufferStride[i] = 0; + } + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + + for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { + if (shader->TransformFeedback.BufferStride[j]) { + if (linked_shader->TransformFeedback.BufferStride[j] != 0 && + shader->TransformFeedback.BufferStride[j] != 0 && + linked_shader->TransformFeedback.BufferStride[j] != + shader->TransformFeedback.BufferStride[j]) { + linker_error(prog, + "intrastage shaders defined with conflicting " + "xfb_stride for buffer %d (%d and %d)\n", j, + linked_shader->TransformFeedback.BufferStride[j], + shader->TransformFeedback.BufferStride[j]); + return; + } + + if (shader->TransformFeedback.BufferStride[j]) + linked_shader->TransformFeedback.BufferStride[j] = + shader->TransformFeedback.BufferStride[j]; + } + } + } + + for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { + if (linked_shader->TransformFeedback.BufferStride[j]) { + prog->TransformFeedback.BufferStride[j] = + linked_shader->TransformFeedback.BufferStride[j]; + + /* We will validate doubles at a later stage */ + if (prog->TransformFeedback.BufferStride[j] % 4) { + linker_error(prog, "invalid qualifier xfb_stride=%d must be a " + "multiple of 4 or if its applied to a type that is " + "or contains a double a multiple of 8.", + prog->TransformFeedback.BufferStride[j]); + return; + } + + if (prog->TransformFeedback.BufferStride[j] / 4 > + ctx->Const.MaxTransformFeedbackInterleavedComponents) { + linker_error(prog, + "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " + "limit has been exceeded."); + return; + } + } + } +} /** * Performs the cross-validation of tessellation control shader vertices and @@ -2101,6 +2164,8 @@ link_intrastage_shaders(void *mem_ctx, link_tes_in_layout_qualifiers(prog, linked, shader_list, num_shaders); link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders); link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_xfb_stride_layout_qualifiers(ctx, prog, linked, shader_list, + num_shaders); populate_symbol_table(linked); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 90c3851e72a..1e15b69ab70 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2337,6 +2337,11 @@ struct gl_shader bool origin_upper_left; bool pixel_center_integer; + struct { + /** Global xfb_stride out qualifier if any */ + GLuint BufferStride[MAX_FEEDBACK_BUFFERS]; + } TransformFeedback; + /** * Tessellation Control shader state from layout qualifiers. */ @@ -2674,6 +2679,8 @@ struct gl_shader_program */ struct { GLenum BufferMode; + /** Global xfb_stride out qualifier if any */ + GLuint BufferStride[MAX_FEEDBACK_BUFFERS]; GLuint NumVarying; GLchar **VaryingNames; /**< Array [NumVarying] of char * */ } TransformFeedback; -- cgit v1.2.3 From 047139e8a027d81141b6e0b1bc939942e873f3ce Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 10 Mar 2016 15:20:32 +1100 Subject: mesa: rename tranform feeback varying macro XFB to XFV A latter patch will use XFB for buffers. Reviewed-by: Dave Airlie --- src/mesa/main/shader_query.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 4967e4b1df1..247a26d4fda 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -60,7 +60,7 @@ DECL_RESOURCE_FUNC(VAR, gl_shader_variable); DECL_RESOURCE_FUNC(UBO, gl_uniform_block); DECL_RESOURCE_FUNC(UNI, gl_uniform_storage); DECL_RESOURCE_FUNC(ATC, gl_active_atomic_buffer); -DECL_RESOURCE_FUNC(XFB, gl_transform_feedback_varying_info); +DECL_RESOURCE_FUNC(XFV, gl_transform_feedback_varying_info); DECL_RESOURCE_FUNC(SUB, gl_subroutine_function); void GLAPIENTRY @@ -433,7 +433,7 @@ _mesa_program_resource_name(struct gl_program_resource *res) case GL_SHADER_STORAGE_BLOCK: return RESOURCE_UBO(res)->Name; case GL_TRANSFORM_FEEDBACK_VARYING: - return RESOURCE_XFB(res)->Name; + return RESOURCE_XFV(res)->Name; case GL_PROGRAM_INPUT: var = RESOURCE_VAR(res); /* Special case gl_VertexIDMESA -> gl_VertexID. */ @@ -473,8 +473,8 @@ _mesa_program_resource_array_size(struct gl_program_resource *res) { switch (res->Type) { case GL_TRANSFORM_FEEDBACK_VARYING: - return RESOURCE_XFB(res)->Size > 1 ? - RESOURCE_XFB(res)->Size : 0; + return RESOURCE_XFV(res)->Size > 1 ? + RESOURCE_XFV(res)->Size : 0; case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: return RESOURCE_VAR(res)->type->length; @@ -1157,7 +1157,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, *val = RESOURCE_VAR(res)->type->gl_type; return 1; case GL_TRANSFORM_FEEDBACK_VARYING: - *val = RESOURCE_XFB(res)->Type; + *val = RESOURCE_XFV(res)->Type; return 1; default: goto invalid_operation; @@ -1180,7 +1180,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, *val = MAX2(_mesa_program_resource_array_size(res), 1); return 1; case GL_TRANSFORM_FEEDBACK_VARYING: - *val = MAX2(RESOURCE_XFB(res)->Size, 1); + *val = MAX2(RESOURCE_XFV(res)->Size, 1); return 1; default: goto invalid_operation; -- cgit v1.2.3 From 51142e7705a5e0c28de9fc097fa7c8446ba0cffe Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 10 Mar 2016 11:40:37 +1100 Subject: mesa: add support to query GL_OFFSET for GL_TRANSFORM_FEEDBACK_VARYING Reviewed-by: Dave Airlie --- src/compiler/glsl/link_varyings.cpp | 1 + src/mesa/main/mtypes.h | 1 + src/mesa/main/shader_query.cpp | 14 +++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index d486b691f58..b091eaf5512 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -742,6 +742,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, } else { xfb_offset = info->Buffers[buffer].Stride; } + info->Varyings[info->NumVarying].Offset = xfb_offset * 4; unsigned location = this->location; unsigned location_frac = this->location_frac; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 1e15b69ab70..320297cdfe9 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1619,6 +1619,7 @@ struct gl_transform_feedback_varying_info char *Name; GLenum Type; GLint Size; + GLint Offset; }; diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 247a26d4fda..15cc49afbfc 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -1186,9 +1186,17 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, goto invalid_operation; } case GL_OFFSET: - VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE); - *val = RESOURCE_UNI(res)->offset; - return 1; + switch (res->Type) { + case GL_UNIFORM: + case GL_BUFFER_VARIABLE: + *val = RESOURCE_UNI(res)->offset; + return 1; + case GL_TRANSFORM_FEEDBACK_VARYING: + *val = RESOURCE_XFV(res)->Offset; + return 1; + default: + goto invalid_operation; + } case GL_BLOCK_INDEX: VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE); *val = RESOURCE_UNI(res)->block_index; -- cgit v1.2.3 From 9e317271d7694d912da99e524294156b6c2de96e Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 10 Mar 2016 15:57:19 +1100 Subject: mesa: add support to query GL_TRANSFORM_FEEDBACK_BUFFER_INDEX Reviewed-by: Dave Airlie --- src/compiler/glsl/link_varyings.cpp | 15 +++++++++------ src/compiler/glsl/link_varyings.h | 4 ++-- src/mesa/main/mtypes.h | 1 + src/mesa/main/shader_query.cpp | 6 ++++++ 4 files changed, 18 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index b091eaf5512..a3b7e1c65c7 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -725,8 +725,9 @@ tfeedback_decl::get_num_outputs() const bool tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, struct gl_transform_feedback_info *info, - unsigned buffer, const unsigned max_outputs, - bool *explicit_stride, bool has_xfb_qualifiers) const + unsigned buffer, unsigned buffer_index, + const unsigned max_outputs, bool *explicit_stride, + bool has_xfb_qualifiers) const { assert(!this->next_buffer_separator); @@ -823,6 +824,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, this->orig_name); info->Varyings[info->NumVarying].Type = this->type; info->Varyings[info->NumVarying].Size = this->size; + info->Varyings[info->NumVarying].BufferIndex = buffer_index; info->NumVarying++; return true; @@ -976,8 +978,8 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, /* GL_SEPARATE_ATTRIBS */ for (unsigned i = 0; i < num_tfeedback_decls; ++i) { if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback, - num_buffers, num_outputs, NULL, - has_xfb_qualifiers)) + num_buffers, num_buffers, num_outputs, + NULL, has_xfb_qualifiers)) return false; buffers |= 1 << num_buffers; @@ -1008,6 +1010,7 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, buffer != tfeedback_decls[i].get_buffer()) { /* we have moved to the next buffer so reset stream id */ buffer_stream_id = -1; + num_buffers++; } if (tfeedback_decls[i].is_next_buffer_separator()) { @@ -1036,11 +1039,11 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, } else { buffer = num_buffers; } - buffers |= 1 << num_buffers; + buffers |= 1 << buffer; if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback, - num_buffers, num_outputs, + buffer, num_buffers, num_outputs, explicit_stride, has_xfb_qualifiers)) return false; } diff --git a/src/compiler/glsl/link_varyings.h b/src/compiler/glsl/link_varyings.h index 9ea79f04fa8..543b80ff29b 100644 --- a/src/compiler/glsl/link_varyings.h +++ b/src/compiler/glsl/link_varyings.h @@ -98,8 +98,8 @@ public: unsigned get_num_outputs() const; bool store(struct gl_context *ctx, struct gl_shader_program *prog, struct gl_transform_feedback_info *info, unsigned buffer, - const unsigned max_outputs, bool *explicit_stride, - bool has_xfb_qualifiers) const; + unsigned buffer_index, const unsigned max_outputs, + bool *explicit_stride, bool has_xfb_qualifiers) const; const tfeedback_candidate *find_candidate(gl_shader_program *prog, hash_table *tfeedback_candidates); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 320297cdfe9..33923d195c9 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1618,6 +1618,7 @@ struct gl_transform_feedback_varying_info { char *Name; GLenum Type; + GLint BufferIndex; GLint Size; GLint Offset; }; diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 15cc49afbfc..ee2eeab0f8d 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -1322,6 +1322,12 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, default: goto invalid_operation; } + + case GL_TRANSFORM_FEEDBACK_BUFFER_INDEX: + VALIDATE_TYPE(GL_TRANSFORM_FEEDBACK_VARYING); + *val = RESOURCE_XFV(res)->BufferIndex; + return 1; + default: goto invalid_enum; } -- cgit v1.2.3 From 7234be0338813c0acd5b700ea2f7f20f7a972e51 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 10 Mar 2016 15:04:02 +1100 Subject: glsl: add transform feedback buffers to resource list Reviewed-by: Dave Airlie --- src/compiler/glsl/linker.cpp | 14 +++++++++++++- src/compiler/glsl/program.h | 3 ++- src/mesa/drivers/dri/i965/brw_link.cpp | 2 +- src/mesa/program/ir_to_mesa.cpp | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 5 files changed, 18 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 4f191c5002d..510a22e5bd3 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3852,7 +3852,8 @@ write_top_level_array_size_and_stride: * resource data. */ void -build_program_resource_list(struct gl_shader_program *shProg) +build_program_resource_list(struct gl_context *ctx, + struct gl_shader_program *shProg) { /* Rebuild resource list. */ if (shProg->ProgramResourceList) { @@ -3910,6 +3911,17 @@ build_program_resource_list(struct gl_shader_program *shProg) } } + /* Add transform feedback buffers. */ + for (unsigned i = 0; i < ctx->Const.MaxTransformFeedbackBuffers; i++) { + if ((shProg->LinkedTransformFeedback.ActiveBuffers >> i) & 1) { + shProg->LinkedTransformFeedback.Buffers[i].Binding = i; + if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_BUFFER, + &shProg->LinkedTransformFeedback.Buffers[i], + 0)) + return; + } + } + /* Add uniforms from uniform storage. */ for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { /* Do not add uniforms internally used by Mesa. */ diff --git a/src/compiler/glsl/program.h b/src/compiler/glsl/program.h index 31bb9aa2435..8f5a31bd5ba 100644 --- a/src/compiler/glsl/program.h +++ b/src/compiler/glsl/program.h @@ -43,7 +43,8 @@ extern void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog); extern void -build_program_resource_list(struct gl_shader_program *shProg); +build_program_resource_list(struct gl_context *ctx, + struct gl_shader_program *shProg); extern void linker_error(struct gl_shader_program *prog, const char *fmt, ...) diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index b512f8b6ee1..c7d6fb8c79b 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -260,6 +260,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) if (brw->precompile && !brw_shader_precompile(ctx, shProg)) return false; - build_program_resource_list(shProg); + build_program_resource_list(ctx, shProg); return true; } diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1d9047ee6fd..35a68562001 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2976,7 +2976,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) _mesa_reference_program(ctx, &linked_prog, NULL); } - build_program_resource_list(prog); + build_program_resource_list(ctx, prog); return prog->LinkStatus; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5e18e8be029..23786b85529 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -6811,7 +6811,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) validate_ir_tree(ir); } - build_program_resource_list(prog); + build_program_resource_list(ctx, prog); for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_program *linked_prog; -- cgit v1.2.3 From c5704bb350425162011367e47be8c69d424a8797 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Thu, 10 Mar 2016 16:17:13 +1100 Subject: mesa: add query support for GL_TRANSFORM_FEEDBACK_BUFFER interface Reviewed-by: Dave Airlie --- src/compiler/glsl/link_varyings.cpp | 1 + src/mesa/main/mtypes.h | 2 ++ src/mesa/main/program_resource.c | 16 +++++++++++++++- src/mesa/main/shader_query.cpp | 35 ++++++++++++++++++++++++++++++++++- 4 files changed, 52 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index a3b7e1c65c7..848668c4381 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -826,6 +826,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, info->Varyings[info->NumVarying].Size = this->size; info->Varyings[info->NumVarying].BufferIndex = buffer_index; info->NumVarying++; + info->Buffers[buffer].NumVaryings++; return true; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 33923d195c9..ff0707d03ba 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1650,6 +1650,8 @@ struct gl_transform_feedback_buffer { unsigned Binding; + unsigned NumVaryings; + /** * Total number of components stored in each buffer. This may be used by * hardware back-ends to determine the correct stride when interleaving diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index 0d9f8aecf08..f2a9f006dd8 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -39,6 +39,7 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface) case GL_UNIFORM_BLOCK: case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: + case GL_TRANSFORM_FEEDBACK_BUFFER: case GL_TRANSFORM_FEEDBACK_VARYING: case GL_ATOMIC_COUNTER_BUFFER: case GL_BUFFER_VARIABLE: @@ -105,7 +106,8 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, (*params)++; break; case GL_MAX_NAME_LENGTH: - if (programInterface == GL_ATOMIC_COUNTER_BUFFER) { + if (programInterface == GL_ATOMIC_COUNTER_BUFFER || + programInterface == GL_TRANSFORM_FEEDBACK_BUFFER) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s pname %s)", _mesa_enum_to_string(programInterface), @@ -165,6 +167,16 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, } } break; + case GL_TRANSFORM_FEEDBACK_BUFFER: + for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { + if (shProg->ProgramResourceList[i].Type == programInterface) { + struct gl_transform_feedback_buffer *buffer = + (struct gl_transform_feedback_buffer *) + shProg->ProgramResourceList[i].Data; + *params = MAX2(*params, buffer->NumVaryings); + } + } + break; default: _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s pname %s)", @@ -289,6 +301,7 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface, return _mesa_program_resource_index(shProg, res); case GL_ATOMIC_COUNTER_BUFFER: + case GL_TRANSFORM_FEEDBACK_BUFFER: default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)", _mesa_enum_to_string(programInterface)); @@ -318,6 +331,7 @@ _mesa_GetProgramResourceName(GLuint program, GLenum programInterface, return; if (programInterface == GL_ATOMIC_COUNTER_BUFFER || + programInterface == GL_TRANSFORM_FEEDBACK_BUFFER || !supported_interface_enum(ctx, programInterface)) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceName(%s)", _mesa_enum_to_string(programInterface)); diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index ee2eeab0f8d..993dc863220 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -61,6 +61,7 @@ DECL_RESOURCE_FUNC(UBO, gl_uniform_block); DECL_RESOURCE_FUNC(UNI, gl_uniform_storage); DECL_RESOURCE_FUNC(ATC, gl_active_atomic_buffer); DECL_RESOURCE_FUNC(XFV, gl_transform_feedback_varying_info); +DECL_RESOURCE_FUNC(XFB, gl_transform_feedback_buffer); DECL_RESOURCE_FUNC(SUB, gl_subroutine_function); void GLAPIENTRY @@ -670,6 +671,7 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, return RESOURCE_SUB(res)->index; case GL_UNIFORM_BLOCK: case GL_SHADER_STORAGE_BLOCK: + case GL_TRANSFORM_FEEDBACK_BUFFER: case GL_TRANSFORM_FEEDBACK_VARYING: default: return calc_resource_index(shProg, res); @@ -707,6 +709,7 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg, case GL_UNIFORM_BLOCK: case GL_ATOMIC_COUNTER_BUFFER: case GL_SHADER_STORAGE_BLOCK: + case GL_TRANSFORM_FEEDBACK_BUFFER: if (_mesa_program_resource_index(shProg, res) == index) return res; break; @@ -1009,7 +1012,8 @@ get_buffer_property(struct gl_shader_program *shProg, GET_CURRENT_CONTEXT(ctx); if (res->Type != GL_UNIFORM_BLOCK && res->Type != GL_ATOMIC_COUNTER_BUFFER && - res->Type != GL_SHADER_STORAGE_BLOCK) + res->Type != GL_SHADER_STORAGE_BLOCK && + res->Type != GL_TRANSFORM_FEEDBACK_BUFFER) goto invalid_operation; if (res->Type == GL_UNIFORM_BLOCK) { @@ -1110,6 +1114,30 @@ get_buffer_property(struct gl_shader_program *shProg, } return RESOURCE_ATC(res)->NumUniforms; } + } else if (res->Type == GL_TRANSFORM_FEEDBACK_BUFFER) { + switch (prop) { + case GL_BUFFER_BINDING: + *val = RESOURCE_XFB(res)->Binding; + return 1; + case GL_NUM_ACTIVE_VARIABLES: + *val = RESOURCE_XFB(res)->NumVaryings; + return 1; + case GL_ACTIVE_VARIABLES: + int i = 0; + for ( ; i < shProg->LinkedTransformFeedback.NumVarying; i++) { + unsigned index = + shProg->LinkedTransformFeedback.Varyings[i].BufferIndex; + struct gl_program_resource *buf_res = + _mesa_program_resource_find_index(shProg, + GL_TRANSFORM_FEEDBACK_BUFFER, + index); + assert(buf_res); + if (res == buf_res) { + *val++ = i; + } + } + return RESOURCE_XFB(res)->NumVaryings; + } } assert(!"support for property type not implemented"); @@ -1140,6 +1168,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, case GL_NAME_LENGTH: switch (res->Type) { case GL_ATOMIC_COUNTER_BUFFER: + case GL_TRANSFORM_FEEDBACK_BUFFER: goto invalid_operation; default: /* Resource name length + terminator. */ @@ -1327,6 +1356,10 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, VALIDATE_TYPE(GL_TRANSFORM_FEEDBACK_VARYING); *val = RESOURCE_XFV(res)->BufferIndex; return 1; + case GL_TRANSFORM_FEEDBACK_BUFFER_STRIDE: + VALIDATE_TYPE(GL_TRANSFORM_FEEDBACK_BUFFER); + *val = RESOURCE_XFB(res)->Stride * 4; + return 1; default: goto invalid_enum; -- cgit v1.2.3 From 10b189f985755496a179b663d4b0746f3717093b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 31 Mar 2016 12:35:25 +1000 Subject: st/mesa: fix fallout from xfb changes. Failed to update state tracker with new buffer interface. Reviewed-by: Timothy Arceri Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_cb_xformfb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_xformfb.c b/src/mesa/state_tracker/st_cb_xformfb.c index 0c01cd5ab78..a5cf3dfd5a9 100644 --- a/src/mesa/state_tracker/st_cb_xformfb.c +++ b/src/mesa/state_tracker/st_cb_xformfb.c @@ -125,7 +125,7 @@ st_begin_transform_feedback(struct gl_context *ctx, GLenum mode, if (bo && bo->buffer) { unsigned stream = - obj->shader_program->LinkedTransformFeedback.BufferStream[i]; + obj->shader_program->LinkedTransformFeedback.Buffers[i].Stream; /* Check whether we need to recreate the target. */ if (!sobj->targets[i] || @@ -204,7 +204,7 @@ st_end_transform_feedback(struct gl_context *ctx, for (i = 0; i < ARRAY_SIZE(sobj->targets); i++) { unsigned stream = - obj->shader_program->LinkedTransformFeedback.BufferStream[i]; + obj->shader_program->LinkedTransformFeedback.Buffers[i].Stream; /* Is it not bound or already set for this stream? */ if (!sobj->targets[i] || sobj->draw_count[stream]) -- cgit v1.2.3 From 65bc94022b0fd31d01c8de65f7f1115a86baa65a Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 14 Feb 2016 23:23:53 -0800 Subject: i965: Remove incorrect cycle estimates. These printed the cycle count the last basic block (sched.time is set per basic block!). We have accurate, full program, data printed elsewhere. Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 5b54b51395c..51d9ce1a5d2 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -1674,11 +1674,6 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode) cfg->num_blocks, mode); sched.run(cfg); - if (unlikely(debug_enabled) && mode == SCHEDULE_POST) { - fprintf(stderr, "%s%d estimated execution time: %d cycles\n", - stage_abbrev, dispatch_width, sched.time); - } - invalidate_live_intervals(); } @@ -1688,10 +1683,5 @@ vec4_visitor::opt_schedule_instructions() vec4_instruction_scheduler sched(this, prog_data->total_grf); sched.run(cfg); - if (unlikely(debug_enabled)) { - fprintf(stderr, "%s estimated execution time: %d cycles\n", - stage_abbrev, sched.time); - } - invalidate_live_intervals(); } -- cgit v1.2.3 From 0d253ce34a8074a6c880d405c5bf860766afe358 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 14 Feb 2016 23:21:03 -0800 Subject: i965: Simplify full scheduling-barrier conditions. All of these were simply code for "architecture register file" (and in the case of destinations, "not the null register"). Reviewed-by: Francisco Jerez --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 35 +++++----------------- 1 file changed, 8 insertions(+), 27 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 51d9ce1a5d2..2153898ef29 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -964,10 +964,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->src[i].is_accumulator()) { add_dep(last_accumulator_write, n); - } else if (inst->src[i].file != BAD_FILE && - inst->src[i].file != IMM && - inst->src[i].file != UNIFORM) { - assert(inst->src[i].file != MRF); + } else if (inst->src[i].file == ARF) { add_barrier_deps(n); } } @@ -1026,8 +1023,7 @@ fs_instruction_scheduler::calculate_deps() } else if (inst->dst.is_accumulator()) { add_dep(last_accumulator_write, n); last_accumulator_write = n; - } else if (inst->dst.file != BAD_FILE && - !inst->dst.is_null()) { + } else if (inst->dst.file == ARF && !inst->dst.is_null()) { add_barrier_deps(n); } @@ -1080,10 +1076,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->src[i].is_accumulator()) { add_dep(n, last_accumulator_write, 0); - } else if (inst->src[i].file != BAD_FILE && - inst->src[i].file != IMM && - inst->src[i].file != UNIFORM) { - assert(inst->src[i].file != MRF); + } else if (inst->src[i].file == ARF) { add_barrier_deps(n); } } @@ -1140,8 +1133,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->dst.is_accumulator()) { last_accumulator_write = n; - } else if (inst->dst.file != BAD_FILE && - !inst->dst.is_null()) { + } else if (inst->dst.file == ARF && !inst->dst.is_null()) { add_barrier_deps(n); } @@ -1204,12 +1196,7 @@ vec4_instruction_scheduler::calculate_deps() } else if (inst->src[i].is_accumulator()) { assert(last_accumulator_write); add_dep(last_accumulator_write, n); - } else if (inst->src[i].file != BAD_FILE && - inst->src[i].file != IMM && - inst->src[i].file != UNIFORM) { - /* No reads from MRF, and ATTR is already translated away */ - assert(inst->src[i].file != MRF && - inst->src[i].file != ATTR); + } else if (inst->src[i].file == ARF) { add_barrier_deps(n); } } @@ -1248,8 +1235,7 @@ vec4_instruction_scheduler::calculate_deps() } else if (inst->dst.is_accumulator()) { add_dep(last_accumulator_write, n); last_accumulator_write = n; - } else if (inst->dst.file != BAD_FILE && - !inst->dst.is_null()) { + } else if (inst->dst.file == ARF && !inst->dst.is_null()) { add_barrier_deps(n); } @@ -1291,11 +1277,7 @@ vec4_instruction_scheduler::calculate_deps() add_dep(n, last_fixed_grf_write); } else if (inst->src[i].is_accumulator()) { add_dep(n, last_accumulator_write); - } else if (inst->src[i].file != BAD_FILE && - inst->src[i].file != IMM && - inst->src[i].file != UNIFORM) { - assert(inst->src[i].file != MRF && - inst->src[i].file != ATTR); + } else if (inst->src[i].file == ARF) { add_barrier_deps(n); } } @@ -1330,8 +1312,7 @@ vec4_instruction_scheduler::calculate_deps() last_fixed_grf_write = n; } else if (inst->dst.is_accumulator()) { last_accumulator_write = n; - } else if (inst->dst.file != BAD_FILE && - !inst->dst.is_null()) { + } else if (inst->dst.file == ARF && !inst->dst.is_null()) { add_barrier_deps(n); } -- cgit v1.2.3 From 436bdd7403bfa260ce6dedcbd0ba96df2ae40ce8 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 17 Feb 2016 11:04:50 -0800 Subject: Revert "i965: Don't add barrier deps for FB write messages." This reverts commit d0e1d6b7e27bf5f05436e47080d326d7daa63af2. The change in the vec4 code is a mistake -- there's never an FS_OPCODE_FB_WRITE in vec4 code. The change in the fs code had the (harmless) effect of not recognizing an FB_WRITE as a scheduling barrier even if it was marked EOT -- harmless because the scheduler marked the last instruction of a block as a barrier, something I'm changing in the following patches. This will be reimplemented later in the series. --- src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 2153898ef29..66eb07e6d1a 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -939,9 +939,8 @@ fs_instruction_scheduler::calculate_deps() foreach_in_list(schedule_node, n, &instructions) { fs_inst *inst = (fs_inst *)n->inst; - if ((inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || - inst->has_side_effects()) && - inst->opcode != FS_OPCODE_FB_WRITE) + if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || + inst->has_side_effects()) add_barrier_deps(n); /* read-after-write deps. */ @@ -1183,7 +1182,7 @@ vec4_instruction_scheduler::calculate_deps() foreach_in_list(schedule_node, n, &instructions) { vec4_instruction *inst = (vec4_instruction *)n->inst; - if (inst->has_side_effects() && inst->opcode != FS_OPCODE_FB_WRITE) + if (inst->has_side_effects()) add_barrier_deps(n); /* read-after-write deps. */ -- cgit v1.2.3 From f60750968c66f7aa15181c4ba315bb594e615044 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 14 Mar 2016 17:39:19 -0700 Subject: i965/vec4/tcs: Set conditional mod on TCS_OPCODE_SRC0_010_IS_ZERO. Missing this causes an assertion failure in the scheduler with the next patch. Additionally, this gives cmod propagation enough information to optimize code better. total instructions in shared programs: 7112991 -> 7112852 (-0.00%) instructions in affected programs: 25704 -> 25565 (-0.54%) helped: 139 total cycles in shared programs: 64812898 -> 64810674 (-0.00%) cycles in affected programs: 127224 -> 125000 (-1.75%) helped: 139 Acked-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 1 - src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 621c3321c81..8409e820f09 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1931,7 +1931,6 @@ generate_code(struct brw_codegen *p, case TCS_OPCODE_SRC0_010_IS_ZERO: /* If src_reg had stride like fs_reg, we wouldn't need this. */ brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0)); - brw_inst_set_cond_modifier(devinfo, brw_last_inst, BRW_CONDITIONAL_Z); break; case TCS_OPCODE_RELEASE_INPUT: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 2046b94bca1..84aa89a7865 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -184,7 +184,9 @@ vec4_tcs_visitor::emit_thread_end() * we don't have stride in the vec4 world, nor UV immediates in * align16, so we need an opcode to get invocation_id<0,4,0>. */ - emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id); + set_condmod(BRW_CONDITIONAL_Z, + emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), + invocation_id)); emit(IF(BRW_PREDICATE_NORMAL)); for (unsigned i = 0; i < key->input_vertices; i += 2) { /* If we have an odd number of input vertices, the last will be -- cgit v1.2.3 From 7b208a731277b4b99b86af3df98c1219099036d7 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 15 Feb 2016 10:05:33 -0800 Subject: i965: Relax restriction on scheduling last instruction. I think when this code was written, basic blocks were always ended by a control flow instruction or an end-of-thread message. That's no longer the case, and removing this restriction actually helps things: instructions in affected programs: 7267 -> 7244 (-0.32%) helped: 4 total cycles in shared programs: 66559580 -> 66431900 (-0.19%) cycles in affected programs: 28310152 -> 28182472 (-0.45%) helped: 9577 HURT: 879 GAINED: 2 The addition of the is_control_flow() checks is not a functional change, since the add_insts_from_block() does not put them in the list of instructions to schedule. I plan to change this in a later patch. Reviewed-by: Francisco Jerez --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 23 +++------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 66eb07e6d1a..46b45a5ea01 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -923,15 +923,6 @@ fs_instruction_scheduler::calculate_deps() */ schedule_node *last_fixed_grf_write = NULL; - /* The last instruction always needs to still be the last - * instruction. Either it's flow control (IF, ELSE, ENDIF, DO, - * WHILE) and scheduling other things after it would disturb the - * basic block, or it's FB_WRITE and we should do a better job at - * dead code elimination anyway. - */ - schedule_node *last = (schedule_node *)instructions.get_tail(); - add_barrier_deps(last); - memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); @@ -940,7 +931,8 @@ fs_instruction_scheduler::calculate_deps() fs_inst *inst = (fs_inst *)n->inst; if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || - inst->has_side_effects()) + inst->is_control_flow() || + inst->has_side_effects()) add_barrier_deps(n); /* read-after-write deps. */ @@ -1166,15 +1158,6 @@ vec4_instruction_scheduler::calculate_deps() */ schedule_node *last_fixed_grf_write = NULL; - /* The last instruction always needs to still be the last instruction. - * Either it's flow control (IF, ELSE, ENDIF, DO, WHILE) and scheduling - * other things after it would disturb the basic block, or it's the EOT - * URB_WRITE and we should do a better job at dead code eliminating - * anything that could have been scheduled after it. - */ - schedule_node *last = (schedule_node *)instructions.get_tail(); - add_barrier_deps(last); - memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); @@ -1182,7 +1165,7 @@ vec4_instruction_scheduler::calculate_deps() foreach_in_list(schedule_node, n, &instructions) { vec4_instruction *inst = (vec4_instruction *)n->inst; - if (inst->has_side_effects()) + if (inst->is_control_flow() || inst->has_side_effects()) add_barrier_deps(n); /* read-after-write deps. */ -- cgit v1.2.3 From a607f4aa57def51236687ec17d7a6391fb147333 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 15 Feb 2016 10:42:14 -0800 Subject: i965: Assert that an instruction is not inserted around itself. Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 21977a23130..736deb443dd 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -948,6 +948,8 @@ adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) void backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) { + assert(this != inst); + if (!this->is_head_sentinel()) assert(inst_is_in_block(block, this) || !"Instruction not in block"); @@ -961,6 +963,8 @@ backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) void backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) { + assert(this != inst); + if (!this->is_tail_sentinel()) assert(inst_is_in_block(block, this) || !"Instruction not in block"); -- cgit v1.2.3 From b4e223cfbf4d46e2ca4c7313f4ebd52798d21551 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 15 Feb 2016 10:43:39 -0800 Subject: i965: Remove NOP insertion kludge in scheduler. Instead of removing every instruction in add_insts_from_block(), just move the instruction to its scheduled location. This is a step towards doing both bottom-up and top-down scheduling without conflicts. Note that this patch changes cycle counts for programs because it begins including control flow instructions in the estimates. Reviewed-by: Francisco Jerez --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 25 +++++----------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 46b45a5ea01..98fa5e3117f 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -783,26 +783,13 @@ schedule_node::schedule_node(backend_instruction *inst, void instruction_scheduler::add_insts_from_block(bblock_t *block) { - /* Removing the last instruction from a basic block removes the block as - * well, so put a NOP at the end to keep it alive. - */ - if (!block->end()->is_control_flow()) { - backend_instruction *nop = new(mem_ctx) backend_instruction(); - nop->opcode = BRW_OPCODE_NOP; - block->end()->insert_after(block, nop); - } - - foreach_inst_in_block_safe(backend_instruction, inst, block) { - if (inst->opcode == BRW_OPCODE_NOP || inst->is_control_flow()) - continue; - + foreach_inst_in_block(backend_instruction, inst, block) { schedule_node *n = new(mem_ctx) schedule_node(inst, this); - this->instructions_to_schedule++; - - inst->remove(block); instructions.push_tail(n); } + + this->instructions_to_schedule = block->end_ip - block->start_ip + 1; } /** Recursive computation of the delay member of a node. */ @@ -1463,7 +1450,6 @@ void instruction_scheduler::schedule_instructions(bblock_t *block) { const struct brw_device_info *devinfo = bs->devinfo; - backend_instruction *inst = block->end(); time = 0; if (!post_reg_alloc) reg_pressure = reg_pressure_in[block->num]; @@ -1482,7 +1468,8 @@ instruction_scheduler::schedule_instructions(bblock_t *block) /* Schedule this instruction. */ assert(chosen); chosen->remove(); - inst->insert_before(block, chosen->inst); + chosen->inst->exec_node::remove(); + block->instructions.push_tail(chosen->inst); instructions_to_schedule--; if (!post_reg_alloc) { @@ -1551,8 +1538,6 @@ instruction_scheduler::schedule_instructions(bblock_t *block) } } - if (block->end()->opcode == BRW_OPCODE_NOP) - block->end()->remove(block); assert(instructions_to_schedule == 0); block->cycle_count = time; -- cgit v1.2.3 From 3495265158cce55d24a7a7f38a0a40d8c9448d38 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sat, 12 Mar 2016 21:15:19 -0800 Subject: i965: Add and use is_scheduling_barrier() function. --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 98fa5e3117f..befa9ff3239 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -892,6 +892,14 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst) return inst->exec_size == 16; } +static bool +is_scheduling_barrier(const fs_inst *inst) +{ + return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || + inst->is_control_flow() || + inst->has_side_effects(); +} + void fs_instruction_scheduler::calculate_deps() { @@ -917,9 +925,7 @@ fs_instruction_scheduler::calculate_deps() foreach_in_list(schedule_node, n, &instructions) { fs_inst *inst = (fs_inst *)n->inst; - if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || - inst->is_control_flow() || - inst->has_side_effects()) + if (is_scheduling_barrier(inst)) add_barrier_deps(n); /* read-after-write deps. */ @@ -1131,6 +1137,13 @@ fs_instruction_scheduler::calculate_deps() } } +static bool +is_scheduling_barrier(const vec4_instruction *inst) +{ + return inst->is_control_flow() || + inst->has_side_effects(); +} + void vec4_instruction_scheduler::calculate_deps() { @@ -1152,7 +1165,7 @@ vec4_instruction_scheduler::calculate_deps() foreach_in_list(schedule_node, n, &instructions) { vec4_instruction *inst = (vec4_instruction *)n->inst; - if (inst->is_control_flow() || inst->has_side_effects()) + if (is_scheduling_barrier(inst)) add_barrier_deps(n); /* read-after-write deps. */ -- cgit v1.2.3 From 4fea98991c8f94f14e469d4621eddc5247d4efbd Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sat, 12 Mar 2016 21:16:03 -0800 Subject: i965: Don't add barrier deps for FB write messages. Ken did this earlier, and this is just me reimplementing his patch a little differently. Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index befa9ff3239..8d925843732 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -897,7 +897,8 @@ is_scheduling_barrier(const fs_inst *inst) { return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || inst->is_control_flow() || - inst->has_side_effects(); + inst->eot || + (inst->has_side_effects() && inst->opcode != FS_OPCODE_FB_WRITE); } void -- cgit v1.2.3 From 6a8ca859f913cf56bc8abce6d1cde02b36a74289 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 16 Feb 2016 01:20:15 -0500 Subject: mesa: add OES_sample_variables to extension table, add enable bit Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie Reviewed-by: Ian Romanick --- src/mesa/main/extensions_table.h | 1 + src/mesa/main/mtypes.h | 1 + 2 files changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 7885aefcc28..700ef24ab22 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -330,6 +330,7 @@ EXT(OES_point_sprite , ARB_point_sprite EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003) EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003) EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005) +EXT(OES_sample_variables , OES_sample_variables , x , x , x , 30, 2014) EXT(OES_shader_image_atomic , ARB_shader_image_load_store , x , x , x , 31, 2015) EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003) EXT(OES_standard_derivatives , OES_standard_derivatives , x , x , x , ES2, 2005) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index ff0707d03ba..be8c21e08db 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3911,6 +3911,7 @@ struct gl_extensions GLboolean EXT_transform_feedback; GLboolean EXT_timer_query; GLboolean EXT_vertex_array_bgra; + GLboolean OES_sample_variables; GLboolean OES_standard_derivatives; GLboolean OES_texture_buffer; /* vendor extensions */ -- cgit v1.2.3 From 411a88accc8a2728abbdfbef4315addbc08cf5a3 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 16 Feb 2016 14:29:38 -0500 Subject: mesa: add GL_OES_sample_shading support Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie Reviewed-by: Ian Romanick --- src/mapi/glapi/gen/es_EXT.xml | 6 ++++++ src/mesa/main/enable.c | 4 ++-- src/mesa/main/extensions_table.h | 1 + src/mesa/main/multisample.c | 3 ++- src/mesa/main/tests/dispatch_sanity.cpp | 3 +++ 5 files changed, 14 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 8f8f997b20d..8e51c058321 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -798,6 +798,12 @@ + + + + + + diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index b90a60ba03f..d2830770ec2 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -807,7 +807,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) /* GL_ARB_sample_shading */ case GL_SAMPLE_SHADING: - if (!_mesa_is_desktop_gl(ctx)) + if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles3(ctx)) goto invalid_enum_error; CHECK_EXTENSION(ARB_sample_shading, cap); if (ctx->Multisample.SampleShading == state) @@ -1606,7 +1606,7 @@ _mesa_IsEnabled( GLenum cap ) /* ARB_sample_shading */ case GL_SAMPLE_SHADING: - if (!_mesa_is_desktop_gl(ctx)) + if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles3(ctx)) goto invalid_enum_error; CHECK_EXTENSION(ARB_sample_shading); return ctx->Multisample.SampleShading; diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 700ef24ab22..c1bcfc53d10 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -330,6 +330,7 @@ EXT(OES_point_sprite , ARB_point_sprite EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003) EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003) EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005) +EXT(OES_sample_shading , OES_sample_variables , x , x , x , 30, 2014) EXT(OES_sample_variables , OES_sample_variables , x , x , x , 30, 2014) EXT(OES_shader_image_atomic , ARB_shader_image_load_store , x , x , x , 31, 2015) EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003) diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c index 77773a20883..5453e38632e 100644 --- a/src/mesa/main/multisample.c +++ b/src/mesa/main/multisample.c @@ -127,7 +127,8 @@ _mesa_MinSampleShading(GLclampf value) { GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.ARB_sample_shading || !_mesa_is_desktop_gl(ctx)) { + if (!_mesa_has_ARB_sample_shading(ctx) && + !_mesa_has_OES_sample_shading(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glMinSampleShading"); return; } diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 309e574ac32..06e7ec1a195 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2454,6 +2454,9 @@ const struct function gles3_functions_possible[] = { { "glTexBufferOES", 31, -1 }, { "glTexBufferRangeOES", 31, -1 }, + /* GL_OES_sample_shading */ + { "glMinSampleShadingOES", 30, -1 }, + { NULL, 0, -1 } }; -- cgit v1.2.3 From 3002296cb68ebc9705b29e024e5fc67d5565ed46 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 20 Feb 2016 15:03:55 -0500 Subject: mesa: add GL_OES_shader_multisample_interpolation support Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie Reviewed-by: Ian Romanick --- src/compiler/glsl/builtin_functions.cpp | 12 +++++++----- src/compiler/glsl/glcpp/glcpp-parse.y | 4 +++- src/compiler/glsl/glsl_lexer.ll | 2 +- src/compiler/glsl/glsl_parser_extras.cpp | 1 + src/compiler/glsl/glsl_parser_extras.h | 2 ++ src/mesa/main/extensions_table.h | 1 + src/mesa/main/get.c | 5 +++++ src/mesa/main/get_hash_params.py | 11 ++++++++--- 8 files changed, 28 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 62f07b2460e..65309fdc09c 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -264,10 +264,12 @@ shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state) } static bool -fs_gpu_shader5(const _mesa_glsl_parse_state *state) +fs_interpolate_at(const _mesa_glsl_parse_state *state) { return state->stage == MESA_SHADER_FRAGMENT && - (state->is_version(400, 0) || state->ARB_gpu_shader5_enable); + (state->is_version(400, 320) || + state->ARB_gpu_shader5_enable || + state->OES_shader_multisample_interpolation_enable); } @@ -5165,7 +5167,7 @@ builtin_builder::_interpolateAtCentroid(const glsl_type *type) { ir_variable *interpolant = in_var(type, "interpolant"); interpolant->data.must_be_shader_input = 1; - MAKE_SIG(type, fs_gpu_shader5, 1, interpolant); + MAKE_SIG(type, fs_interpolate_at, 1, interpolant); body.emit(ret(interpolate_at_centroid(interpolant))); @@ -5178,7 +5180,7 @@ builtin_builder::_interpolateAtOffset(const glsl_type *type) ir_variable *interpolant = in_var(type, "interpolant"); interpolant->data.must_be_shader_input = 1; ir_variable *offset = in_var(glsl_type::vec2_type, "offset"); - MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset); + MAKE_SIG(type, fs_interpolate_at, 2, interpolant, offset); body.emit(ret(interpolate_at_offset(interpolant, offset))); @@ -5191,7 +5193,7 @@ builtin_builder::_interpolateAtSample(const glsl_type *type) ir_variable *interpolant = in_var(type, "interpolant"); interpolant->data.must_be_shader_input = 1; ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num"); - MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num); + MAKE_SIG(type, fs_interpolate_at, 2, interpolant, sample_num); body.emit(ret(interpolate_at_sample(interpolant, sample_num))); diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 14d2c7d3eff..e8646c0ad32 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2371,8 +2371,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions != NULL) { if (extensions->OES_EGL_image_external) add_builtin_define(parser, "GL_OES_EGL_image_external", 1); - if (extensions->OES_sample_variables) + if (extensions->OES_sample_variables) { add_builtin_define(parser, "GL_OES_sample_variables", 1); + add_builtin_define(parser, "GL_OES_shader_multisample_interpolation", 1); + } if (extensions->OES_standard_derivatives) add_builtin_define(parser, "GL_OES_standard_derivatives", 1); if (extensions->ARB_texture_multisample) diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index 5492045f7c3..0b7695f8d3e 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -584,7 +584,7 @@ usamplerBuffer KEYWORD_WITH_ALT(140, 300, 140, 320, yyextra->EXT_texture_buffer_ /* Additional reserved words in GLSL ES 3.00 */ resource KEYWORD(0, 300, 0, 0, RESOURCE); -sample KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_gpu_shader5_enable, SAMPLE); +sample KEYWORD_WITH_ALT(400, 300, 400, 320, yyextra->ARB_gpu_shader5_enable || yyextra->OES_shader_multisample_interpolation_enable, SAMPLE); subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE); diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index def86e18223..3dc68741902 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -617,6 +617,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(OES_gpu_shader5, false, true, ARB_gpu_shader5), EXT(OES_sample_variables, false, true, OES_sample_variables), EXT(OES_shader_image_atomic, false, true, ARB_shader_image_load_store), + EXT(OES_shader_multisample_interpolation, false, true, OES_sample_variables), EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), EXT(OES_texture_3D, false, true, dummy_true), EXT(OES_texture_buffer, false, true, OES_texture_buffer), diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index 219fb427c0f..0cc2d259f3a 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -608,6 +608,8 @@ struct _mesa_glsl_parse_state { bool OES_sample_variables_warn; bool OES_shader_image_atomic_enable; bool OES_shader_image_atomic_warn; + bool OES_shader_multisample_interpolation_enable; + bool OES_shader_multisample_interpolation_warn; bool OES_standard_derivatives_enable; bool OES_standard_derivatives_warn; bool OES_texture_3D_enable; diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index c1bcfc53d10..84401fd501b 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -333,6 +333,7 @@ EXT(OES_rgb8_rgba8 , dummy_true EXT(OES_sample_shading , OES_sample_variables , x , x , x , 30, 2014) EXT(OES_sample_variables , OES_sample_variables , x , x , x , 30, 2014) EXT(OES_shader_image_atomic , ARB_shader_image_load_store , x , x , x , 31, 2015) +EXT(OES_shader_multisample_interpolation , OES_sample_variables , x , x , x , 30, 2014) EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003) EXT(OES_standard_derivatives , OES_standard_derivatives , x , x , x , ES2, 2005) EXT(OES_stencil1 , dummy_false , x , x , x , x , 2005) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 88efd3ee642..6829c33254c 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -408,6 +408,11 @@ static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = { EXTRA_END }; +static const int extra_ARB_gpu_shader5_or_OES_sample_variables[] = { + EXT(ARB_gpu_shader5), + EXT(OES_sample_variables), +}; + EXTRA_EXT(ARB_texture_cube_map); EXTRA_EXT(EXT_texture_array); EXTRA_EXT(NV_fog_distance); diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 62968fc0300..7998d0366d3 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -503,6 +503,14 @@ descriptor=[ [ "MAX_COMBINED_SHADER_OUTPUT_RESOURCES", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store_shader_storage_buffer_object_es31" ], ]}, +# Enums in OpenGL Core profile and ES 3.0 +{ "apis": ["GL_CORE", "GLES3"], "params": [ + # GL_ARB_gpu_shader5 / GL_OES_shader_multisample_interpolation + [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5_or_OES_sample_variables" ], + [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5_or_OES_sample_variables" ], + [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5_or_OES_sample_variables" ], +]}, + # Enums in OpenGL Core profile and ES 3.1 { "apis": ["GL_CORE", "GLES31"], "params": [ # GL_ARB_draw_indirect / GLES 3.1 @@ -882,9 +890,6 @@ descriptor=[ # GL_ARB_gpu_shader5 [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ], - [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], - [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], - [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ], # GL_ARB_tessellation_shader [ "PATCH_VERTICES", "CONTEXT_INT(TessCtrlProgram.patch_vertices), extra_ARB_tessellation_shader" ], -- cgit v1.2.3 From 2c7f5fe2960362b266aeb8e1ed0ebea762131df5 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 16 Feb 2016 01:27:27 -0500 Subject: st/mesa: add ES sample-shading support We require the full ARB_gpu_shader5 for now, but in the future some other CAP could get exposed to indicate that only the multisample-related behavior of ARB_gpu_shader5 is available. Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- docs/GL3.txt | 6 +++--- src/mesa/state_tracker/st_extensions.c | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/docs/GL3.txt b/docs/GL3.txt index f6248daa1b6..489b8219351 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -262,11 +262,11 @@ GLES3.2, GLSL ES 3.2 GL_OES_geometry_shader started (Marta) GL_OES_gpu_shader5 DONE (all drivers that support GL_ARB_gpu_shader5) GL_OES_primitive_bounding box not started - GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers) - GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers) + GL_OES_sample_shading DONE (nvc0, r600, radeonsi) + GL_OES_sample_variables DONE (nvc0, r600, radeonsi) GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store) GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done) - GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done) + GL_OES_shader_multisample_interpolation DONE (nvc0, r600, radeonsi) GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers) GL_OES_texture_border_clamp DONE (all drivers) GL_OES_texture_buffer DONE (core only) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 36a12010c23..0a25770aa02 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -921,6 +921,12 @@ void st_init_extensions(struct pipe_screen *screen, extensions->ARB_sync = GL_TRUE; } + /* Needs PIPE_CAP_SAMPLE_SHADING + all the sample-related bits of + * ARB_gpu_shader5. This enables all the per-sample shading ES extensions. + */ + extensions->OES_sample_variables = extensions->ARB_sample_shading && + extensions->ARB_gpu_shader5; + /* Maximum sample count. */ { enum pipe_format color_formats[] = { -- cgit v1.2.3 From 571f538a622d9a7050015b58b7b1ac240f289dcb Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 30 Mar 2016 22:03:06 -0400 Subject: mesa: remove duplicate MAX_GEOMETRY_SHADER_INVOCATIONS entry Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mesa/main/get_hash_params.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 7998d0366d3..a0cc4f8e842 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -888,9 +888,6 @@ descriptor=[ [ "VIEWPORT_BOUNDS_RANGE", "CONTEXT_FLOAT2(Const.ViewportBounds), extra_ARB_viewport_array" ], [ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ], -# GL_ARB_gpu_shader5 - [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ], - # GL_ARB_tessellation_shader [ "PATCH_VERTICES", "CONTEXT_INT(TessCtrlProgram.patch_vertices), extra_ARB_tessellation_shader" ], [ "PATCH_DEFAULT_OUTER_LEVEL", "CONTEXT_FLOAT4(TessCtrlProgram.patch_default_outer_level), extra_ARB_tessellation_shader" ], -- cgit v1.2.3 From ebdb5345480957c4fc3068fab17926be28d7dcd4 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 15 Feb 2016 20:34:52 -0500 Subject: mesa: add GL_OES_copy_image support Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- docs/GL3.txt | 2 +- src/mapi/glapi/gen/es_EXT.xml | 22 +++++++++ src/mesa/main/copyimage.c | 27 ++++++++++- src/mesa/main/extensions_table.h | 1 + src/mesa/main/mtypes.h | 1 + src/mesa/main/tests/dispatch_sanity.cpp | 3 ++ src/mesa/main/textureview.c | 86 +++++++++++++++++++++++++++++++++ src/mesa/state_tracker/st_extensions.c | 11 +++++ 8 files changed, 151 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/docs/GL3.txt b/docs/GL3.txt index 489b8219351..c76b1e28586 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -256,7 +256,7 @@ GLES3.2, GLSL ES 3.2 GL_KHR_debug DONE (all drivers) GL_KHR_robustness not started (90% done with the ARB variant) GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+) - GL_OES_copy_image not started (based on GL_ARB_copy_image, which is done for some drivers) + GL_OES_copy_image DONE (core only) GL_OES_draw_buffers_indexed not started GL_OES_draw_elements_base_vertex DONE (all drivers) GL_OES_geometry_shader started (Marta) diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 8e51c058321..149acd5b10c 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -1013,6 +1013,28 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c index d571d221bce..a0f1c691220 100644 --- a/src/mesa/main/copyimage.c +++ b/src/mesa/main/copyimage.c @@ -25,6 +25,7 @@ * Jason Ekstrand */ +#include "context.h" #include "glheader.h" #include "errors.h" #include "enums.h" @@ -360,8 +361,32 @@ compressed_format_compatible(const struct gl_context *ctx, case GL_COMPRESSED_SIGNED_RED_RGTC1: compressedClass = BLOCK_CLASS_64_BITS; break; + case GL_COMPRESSED_RGBA8_ETC2_EAC: + case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC: + case GL_COMPRESSED_RG11_EAC: + case GL_COMPRESSED_SIGNED_RG11_EAC: + if (_mesa_is_gles(ctx)) + compressedClass = BLOCK_CLASS_128_BITS; + else + return false; + break; + case GL_COMPRESSED_RGB8_ETC2: + case GL_COMPRESSED_SRGB8_ETC2: + case GL_COMPRESSED_R11_EAC: + case GL_COMPRESSED_SIGNED_R11_EAC: + case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: + case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: + if (_mesa_is_gles(ctx)) + compressedClass = BLOCK_CLASS_64_BITS; + else + return false; + break; default: - return false; + if (_mesa_is_gles(ctx) && _mesa_is_astc_format(compressedFormat)) + compressedClass = BLOCK_CLASS_128_BITS; + else + return false; + break; } switch (otherFormat) { diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 84401fd501b..717efabfec9 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -309,6 +309,7 @@ EXT(OES_blend_subtract , dummy_true EXT(OES_byte_coordinates , dummy_true , x , x , ES1, x , 2002) EXT(OES_compressed_ETC1_RGB8_texture , OES_compressed_ETC1_RGB8_texture , x , x , ES1, ES2, 2005) EXT(OES_compressed_paletted_texture , dummy_true , x , x , ES1, x , 2003) +EXT(OES_copy_image , OES_copy_image , x , x , x , 30, 2014) EXT(OES_depth24 , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_depth32 , dummy_false , x , x , x , x , 2005) EXT(OES_depth_texture , ARB_depth_texture , x , x , x , ES2, 2006) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index be8c21e08db..f2cb4cb107b 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3911,6 +3911,7 @@ struct gl_extensions GLboolean EXT_transform_feedback; GLboolean EXT_timer_query; GLboolean EXT_vertex_array_bgra; + GLboolean OES_copy_image; GLboolean OES_sample_variables; GLboolean OES_standard_derivatives; GLboolean OES_texture_buffer; diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 06e7ec1a195..c3aa7106b33 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2457,6 +2457,9 @@ const struct function gles3_functions_possible[] = { /* GL_OES_sample_shading */ { "glMinSampleShadingOES", 30, -1 }, + /* GL_OES_copy_image */ + { "glCopyImageSubDataOES", 30, -1 }, + { NULL, 0, -1 } }; diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index 419fbebf2f0..4b3b3245c2e 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -82,6 +82,39 @@ | | COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT | --------------------------------------------------------------------------- */ + +#define VIEW_CLASS_GLES(x) (GL_VIEW_CLASS_BPTC_FLOAT + 1 + x) +#define VIEW_CLASS_EAC_R11 VIEW_CLASS_GLES(0) +#define VIEW_CLASS_EAC_RG11 VIEW_CLASS_GLES(1) +#define VIEW_CLASS_ETC2_RGB VIEW_CLASS_GLES(2) +#define VIEW_CLASS_ETC2_RGBA VIEW_CLASS_GLES(3) +#define VIEW_CLASS_ETC2_EAC_RGBA VIEW_CLASS_GLES(4) +#define VIEW_CLASS_ASTC_4x4_RGBA VIEW_CLASS_GLES(5) +#define VIEW_CLASS_ASTC_5x4_RGBA VIEW_CLASS_GLES(6) +#define VIEW_CLASS_ASTC_5x5_RGBA VIEW_CLASS_GLES(7) +#define VIEW_CLASS_ASTC_6x5_RGBA VIEW_CLASS_GLES(8) +#define VIEW_CLASS_ASTC_6x6_RGBA VIEW_CLASS_GLES(9) +#define VIEW_CLASS_ASTC_8x5_RGBA VIEW_CLASS_GLES(10) +#define VIEW_CLASS_ASTC_8x6_RGBA VIEW_CLASS_GLES(11) +#define VIEW_CLASS_ASTC_8x8_RGBA VIEW_CLASS_GLES(12) +#define VIEW_CLASS_ASTC_10x5_RGBA VIEW_CLASS_GLES(13) +#define VIEW_CLASS_ASTC_10x6_RGBA VIEW_CLASS_GLES(14) +#define VIEW_CLASS_ASTC_10x8_RGBA VIEW_CLASS_GLES(15) +#define VIEW_CLASS_ASTC_10x10_RGBA VIEW_CLASS_GLES(16) +#define VIEW_CLASS_ASTC_12x10_RGBA VIEW_CLASS_GLES(17) +#define VIEW_CLASS_ASTC_12x12_RGBA VIEW_CLASS_GLES(18) +#define VIEW_CLASS_ASTC_3x3x3_RGBA VIEW_CLASS_GLES(19) +#define VIEW_CLASS_ASTC_4x3x3_RGBA VIEW_CLASS_GLES(20) +#define VIEW_CLASS_ASTC_4x4x3_RGBA VIEW_CLASS_GLES(21) +#define VIEW_CLASS_ASTC_4x4x4_RGBA VIEW_CLASS_GLES(22) +#define VIEW_CLASS_ASTC_5x4x4_RGBA VIEW_CLASS_GLES(23) +#define VIEW_CLASS_ASTC_5x5x4_RGBA VIEW_CLASS_GLES(24) +#define VIEW_CLASS_ASTC_5x5x5_RGBA VIEW_CLASS_GLES(25) +#define VIEW_CLASS_ASTC_6x5x5_RGBA VIEW_CLASS_GLES(26) +#define VIEW_CLASS_ASTC_6x6x5_RGBA VIEW_CLASS_GLES(27) +#define VIEW_CLASS_ASTC_6x6x6_RGBA VIEW_CLASS_GLES(28) + + struct internal_format_class_info { GLenum view_class; GLenum internal_format; @@ -162,6 +195,41 @@ static const struct internal_format_class_info s3tc_compatible_internal_formats[ {GL_VIEW_CLASS_S3TC_DXT5_RGBA, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, }; +static const struct internal_format_class_info gles_etc2_compatible_internal_formats[] = { + {VIEW_CLASS_EAC_R11, GL_COMPRESSED_R11_EAC}, + {VIEW_CLASS_EAC_R11, GL_COMPRESSED_SIGNED_R11_EAC}, + {VIEW_CLASS_EAC_RG11, GL_COMPRESSED_RG11_EAC}, + {VIEW_CLASS_EAC_RG11, GL_COMPRESSED_SIGNED_RG11_EAC}, + {VIEW_CLASS_ETC2_RGB, GL_COMPRESSED_RGB8_ETC2}, + {VIEW_CLASS_ETC2_RGB, GL_COMPRESSED_SRGB8_ETC2}, + {VIEW_CLASS_ETC2_RGBA, GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2}, + {VIEW_CLASS_ETC2_RGBA, GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2}, + {VIEW_CLASS_ETC2_EAC_RGBA, GL_COMPRESSED_RGBA8_ETC2_EAC}, + {VIEW_CLASS_ETC2_EAC_RGBA, GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC}, +}; + +static const struct internal_format_class_info gles_astc_compatible_internal_formats[] = { +#define ASTC_FMT(size) \ + {VIEW_CLASS_ASTC_##size## _RGBA, GL_COMPRESSED_RGBA_ASTC_##size##_KHR}, \ + {VIEW_CLASS_ASTC_##size##_RGBA, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_##size##_KHR} + + ASTC_FMT(4x4), + ASTC_FMT(5x4), + ASTC_FMT(5x5), + ASTC_FMT(6x5), + ASTC_FMT(6x6), + ASTC_FMT(8x5), + ASTC_FMT(8x6), + ASTC_FMT(8x8), + ASTC_FMT(10x5), + ASTC_FMT(10x6), + ASTC_FMT(10x8), + ASTC_FMT(10x10), + ASTC_FMT(12x10), + ASTC_FMT(12x12), +#undef ASTC_FMT +}; + GLenum _mesa_texture_view_lookup_view_class(const struct gl_context *ctx, GLenum internalformat) { @@ -180,6 +248,24 @@ _mesa_texture_view_lookup_view_class(const struct gl_context *ctx, GLenum intern return s3tc_compatible_internal_formats[i].view_class; } } + + if (_mesa_is_gles3(ctx)) { + for (i = 0; i < ARRAY_SIZE(gles_etc2_compatible_internal_formats); i++) { + if (gles_etc2_compatible_internal_formats[i].internal_format + == internalformat) + return gles_etc2_compatible_internal_formats[i].view_class; + } + + if (ctx->Extensions.KHR_texture_compression_astc_ldr) { + for (i = 0; i < ARRAY_SIZE(gles_astc_compatible_internal_formats); i++) { + if (gles_astc_compatible_internal_formats[i].internal_format + == internalformat) + return gles_astc_compatible_internal_formats[i].view_class; + } + } + + /* FINISHME: Add 3D OES formats when supported */ + } return GL_FALSE; } diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 0a25770aa02..8748ab5c876 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -927,6 +927,17 @@ void st_init_extensions(struct pipe_screen *screen, extensions->OES_sample_variables = extensions->ARB_sample_shading && extensions->ARB_gpu_shader5; + /* If we don't have native ETC2 support, we don't keep track of the + * original ETC2 data. This is necessary to be able to copy images between + * compatible view classes. + */ + if (extensions->ARB_copy_image && screen->is_format_supported( + screen, PIPE_FORMAT_ETC2_RGB8, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW)) { + extensions->OES_copy_image = GL_TRUE; + } + /* Maximum sample count. */ { enum pipe_format color_formats[] = { -- cgit v1.2.3 From a94d8d51d7e426485a72e6cfd6185dee9df5e070 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 17 Feb 2016 13:27:14 -0500 Subject: mesa: add GL_EXT_copy_image support The extension is identical to GL_OES_copy_image. But dEQP has tests that want the EXT variant. Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mapi/glapi/gen/es_EXT.xml | 22 ++++++++++++++++++++++ src/mesa/main/extensions_table.h | 1 + 2 files changed, 23 insertions(+) (limited to 'src/mesa') diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 149acd5b10c..471f6b6e565 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -915,6 +915,28 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 717efabfec9..1b003609eee 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -186,6 +186,7 @@ EXT(EXT_blend_subtract , dummy_true EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015) EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013) EXT(EXT_compiled_vertex_array , dummy_true , GLL, x , x , x , 1996) +EXT(EXT_copy_image , OES_copy_image , x , x , x , 30, 2014) EXT(EXT_copy_texture , dummy_true , GLL, x , x , x , 1995) EXT(EXT_depth_bounds_test , EXT_depth_bounds_test , GLL, GLC, x , x , 2002) EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009) -- cgit v1.2.3 From 8bb9c6ff7f2398e1a497a9b5413736f58205843c Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 28 Mar 2016 17:27:36 -0700 Subject: ptn: Silence unused parameter warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The KIL instruction doesn't have a destination, so ptn_kil never uses dest. program/prog_to_nir.c: In function ‘ptn_kil’: program/prog_to_nir.c:547:38: warning: unused parameter ‘dest’ [-Wunused-parameter] ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) ^ Signed-off-by: Ian Romanick Reviewed-by: Iago Toral Quiroga Reviewed-by: Kenneth Graunke --- src/mesa/program/prog_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 16b79c94c84..ce25f6d14f6 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -545,7 +545,7 @@ ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) } static void -ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) +ptn_kil(nir_builder *b, nir_ssa_def **src) { nir_ssa_def *cmp = b->shader->options->native_integers ? nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) : @@ -830,7 +830,7 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) break; case OPCODE_KIL: - ptn_kil(b, dest, src); + ptn_kil(b, src); break; case OPCODE_CMP: -- cgit v1.2.3 From cdea12bf035117f7cae5db0d52f3050d81c50c37 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 24 Mar 2016 15:48:55 -0700 Subject: ptn: Fix all users of ptn_swizzle None of the callers actually wanted what it did. In ptn_xpd, you only ever want a vec3 swizzle. In ptn_tex, you want a swizzle that matches the number of required texture coordinates. shader-db results: G45: total instructions in shared programs: 4011240 -> 4010911 (-0.01%) instructions in affected programs: 59232 -> 58903 (-0.56%) helped: 114 HURT: 0 total cycles in shared programs: 84314194 -> 84313220 (-0.00%) cycles in affected programs: 779150 -> 778176 (-0.13%) helped: 110 HURT: 13 Ironlake: total instructions in shared programs: 6397262 -> 6396605 (-0.01%) instructions in affected programs: 117402 -> 116745 (-0.56%) helped: 227 HURT: 0 total cycles in shared programs: 128889798 -> 128888524 (-0.00%) cycles in affected programs: 1214644 -> 1213370 (-0.10%) helped: 179 HURT: 44 Sandy Bridge: total instructions in shared programs: 8467391 -> 8467384 (-0.00%) instructions in affected programs: 3107 -> 3100 (-0.23%) helped: 10 HURT: 6 total cycles in shared programs: 117580120 -> 117573448 (-0.01%) cycles in affected programs: 103158 -> 96486 (-6.47%) helped: 84 HURT: 11 Ivy Bridge: total instructions in shared programs: 7774255 -> 7774258 (0.00%) instructions in affected programs: 1677 -> 1680 (0.18%) helped: 8 HURT: 6 total cycles in shared programs: 65743828 -> 65739190 (-0.01%) cycles in affected programs: 89312 -> 84674 (-5.19%) helped: 78 HURT: 23 Haswell: total instructions in shared programs: 7107172 -> 7107150 (-0.00%) instructions in affected programs: 2048 -> 2026 (-1.07%) helped: 16 HURT: 0 total cycles in shared programs: 64653636 -> 64647486 (-0.01%) cycles in affected programs: 86836 -> 80686 (-7.08%) helped: 85 HURT: 17 Broadwell and Skylake: total instructions in shared programs: 8447529 -> 8447507 (-0.00%) instructions in affected programs: 2038 -> 2016 (-1.08%) helped: 16 HURT: 0 total cycles in shared programs: 66418670 -> 66413416 (-0.01%) cycles in affected programs: 90110 -> 84856 (-5.83%) helped: 83 HURT: 20 Signed-off-by: Ian Romanick Reviewed-by: Iago Toral Quiroga Reviewed-by: Kenneth Graunke --- src/mesa/program/prog_to_nir.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index ce25f6d14f6..a6119ae4e7c 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -59,7 +59,6 @@ struct ptn_compile { #define SWIZ(X, Y, Z, W) \ (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W } -#define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true) #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true) static nir_ssa_def * @@ -491,11 +490,11 @@ ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) ptn_move_dest_masked(b, dest, nir_fsub(b, nir_fmul(b, - ptn_swizzle(b, src[0], Y, Z, X, X), - ptn_swizzle(b, src[1], Z, X, Y, X)), + nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true), + nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)), nir_fmul(b, - ptn_swizzle(b, src[1], Y, Z, X, X), - ptn_swizzle(b, src[0], Z, X, Y, X))), + nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true), + nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))), WRITEMASK_XYZ); ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); } @@ -642,7 +641,8 @@ ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src, unsigned src_number = 0; instr->src[src_number].src = - nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W)); + nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W), + instr->coord_components, true)); instr->src[src_number].src_type = nir_tex_src_coord; src_number++; -- cgit v1.2.3 From 58d4751fa0c5a38069879e9f72047b75f8351d93 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Mar 2016 09:58:11 -0700 Subject: i965: Fix textureSize() depth value for 1 layer surfaces on Gen4-6. According to the Sandybridge PRM's description of the resinfo message, the .z value returned will be Depth == 0 ? 0 : Depth + 1. The earlier PRMs have the same table. This means we return 0 for array textures with a single slice, when we ought to return 1. Just override it to max(depth, 1). Fixes 10 dEQP-GLES3.functional tests on Sandybridge: shaders.texture_functions.texturesize.sampler2darray_fixed_vertex shaders.texture_functions.texturesize.sampler2darray_fixed_fragment shaders.texture_functions.texturesize.sampler2darray_float_vertex shaders.texture_functions.texturesize.sampler2darray_float_fragment shaders.texture_functions.texturesize.isampler2darray_vertex shaders.texture_functions.texturesize.isampler2darray_fragment shaders.texture_functions.texturesize.usampler2darray_vertex shaders.texture_functions.texturesize.usampler2darray_fragment shaders.texture_functions.texturesize.sampler2darrayshadow_vertex shaders.texture_functions.texturesize.sampler2darrayshadow_fragment Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++++++++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index dc61d096efc..4fbcf2bd105 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -228,10 +228,16 @@ fs_visitor::emit_texture(ir_texture_opcode op, } /* fixup #layers for cube map arrays */ - if (op == ir_txs && is_cube_array) { + if (op == ir_txs && (devinfo->gen < 7 || is_cube_array)) { fs_reg depth = offset(dst, bld, 2); fs_reg fixed_depth = vgrf(glsl_type::int_type); - bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6)); + + if (is_cube_array) { + bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6)); + } else if (devinfo->gen < 7) { + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ + bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), BRW_CONDITIONAL_GE); + } fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); int components = inst->regs_written / (inst->exec_size / 8); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index d30330a379f..2ab141fdf21 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1056,10 +1056,16 @@ vec4_visitor::emit_texture(ir_texture_opcode op, /* fixup num layers (z) for cube arrays: hardware returns faces * layers; * spec requires layers. */ - if (op == ir_txs && is_cube_array) { - emit_math(SHADER_OPCODE_INT_QUOTIENT, - writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), brw_imm_d(6)); + if (op == ir_txs) { + if (is_cube_array) { + emit_math(SHADER_OPCODE_INT_QUOTIENT, + writemask(inst->dst, WRITEMASK_Z), + src_reg(inst->dst), brw_imm_d(6)); + } else if (devinfo->gen < 7) { + /* Gen4-6 return 0 instead of 1 for single layer surfaces. */ + emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z), + src_reg(inst->dst), brw_imm_d(1)); + } } if (devinfo->gen == 6 && op == ir_tg4) { -- cgit v1.2.3 From a57320a9ba4f453c567716bf8270c0ac629ad0d2 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 31 Mar 2016 00:53:21 -0700 Subject: i965: Use brw->urb.min_vs_urb_entries instead of 32 for BLORP. Haswell GT2 and GT3 have a minimum of 64 entries. Hardcoding 32 is not legal. v2: Delete stale comment (caught by Alejandro). Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 89b73ca7519..eae1e30e150 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -55,11 +55,8 @@ gen7_blorp_emit_urb_config(struct brw_context *brw) 0 /* gs_size */, urb_size / 2 /* fs_size */); - /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS, Dword - * 1.15:0 "VS Number of URB Entries". - */ gen7_emit_urb_state(brw, - 32 /* num_vs_entries */, + brw->urb.min_vs_entries /* num_vs_entries */, 2 /* vs_size */, 2 /* vs_start */, 0 /* num_hs_entries */, -- cgit v1.2.3 From e0e16830873b945a24880ae515466bf7f9165f42 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 31 Mar 2016 00:48:01 -0400 Subject: mesa: add GL_OES/EXT_draw_buffers_indexed support This is the same ext as ARB_draw_buffers_blend (plus some core functionality that already exists). Add the alias entrypoints. Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- docs/GL3.txt | 2 +- docs/relnotes/11.3.0.html | 1 + src/mapi/glapi/gen/es_EXT.xml | 106 ++++++++++++++++++++++++++++++++ src/mesa/main/extensions_table.h | 2 + src/mesa/main/tests/dispatch_sanity.cpp | 10 +++ 5 files changed, 120 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/docs/GL3.txt b/docs/GL3.txt index c76b1e28586..5b6dc89e250 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -257,7 +257,7 @@ GLES3.2, GLSL ES 3.2 GL_KHR_robustness not started (90% done with the ARB variant) GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+) GL_OES_copy_image DONE (core only) - GL_OES_draw_buffers_indexed not started + GL_OES_draw_buffers_indexed DONE (all drivers that support GL_ARB_draw_buffers_blend) GL_OES_draw_elements_base_vertex DONE (all drivers) GL_OES_geometry_shader started (Marta) GL_OES_gpu_shader5 DONE (all drivers that support GL_ARB_gpu_shader5) diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html index 4e23959e314..8aa9444c54c 100644 --- a/docs/relnotes/11.3.0.html +++ b/docs/relnotes/11.3.0.html @@ -49,6 +49,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_shader_image_load_store on radeonsi, softpipe
  • GL_ARB_shader_image_size on radeonsi
  • GL_ATI_fragment_shader on all Gallium drivers
  • +
  • GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend
  • GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp
  • GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store
  • diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 471f6b6e565..3b2c15ebf5c 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -823,6 +823,59 @@
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -937,6 +990,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 1b003609eee..7c36b1e1d0b 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -192,6 +192,7 @@ EXT(EXT_depth_bounds_test , EXT_depth_bounds_test EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_draw_buffers , dummy_true , x , x , x , ES2, 2012) EXT(EXT_draw_buffers2 , EXT_draw_buffers2 , GLL, GLC, x , x , 2006) +EXT(EXT_draw_buffers_indexed , ARB_draw_buffers_blend , x , x , x , 30, 2014) EXT(EXT_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014) EXT(EXT_draw_instanced , ARB_draw_instanced , GLL, GLC, x , x , 2006) EXT(EXT_draw_range_elements , dummy_true , GLL, x , x , x , 1997) @@ -315,6 +316,7 @@ EXT(OES_depth24 , dummy_true EXT(OES_depth32 , dummy_false , x , x , x , x , 2005) EXT(OES_depth_texture , ARB_depth_texture , x , x , x , ES2, 2006) EXT(OES_depth_texture_cube_map , OES_depth_texture_cube_map , x , x , x , ES2, 2012) +EXT(OES_draw_buffers_indexed , ARB_draw_buffers_blend , x , x , x , 30, 2014) EXT(OES_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014) EXT(OES_draw_texture , OES_draw_texture , x , x , ES1, x , 2004) EXT(OES_element_index_uint , dummy_true , x , x , ES1, ES2, 2005) diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index c3aa7106b33..9f278be47ca 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2460,6 +2460,16 @@ const struct function gles3_functions_possible[] = { /* GL_OES_copy_image */ { "glCopyImageSubDataOES", 30, -1 }, + /* GL_OES_draw_buffers_indexed */ + { "glBlendFunciOES", 30, -1 }, + { "glBlendFuncSeparateiOES", 30, -1 }, + { "glBlendEquationiOES", 30, -1 }, + { "glBlendEquationSeparateiOES", 30, -1 }, + { "glColorMaskiOES", 30, -1 }, + { "glEnableiOES", 30, -1 }, + { "glDisableiOES", 30, -1 }, + { "glIsEnablediOES", 30, -1 }, + { NULL, 0, -1 } }; -- cgit v1.2.3 From 2d9e0f24e1a13648a9bceb03dbfb438e03c81fd7 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 2 Feb 2016 14:45:07 -0600 Subject: Android: fix x86 gallium builds Builds with gallium enabled fail on x86 with linker error: external/mesa3d/src/mesa/vbo/vbo_exec_array.c:127: error: undefined reference to '_mesa_uint_array_min_max' The problem is sse_minmax.c is not included in the libmesa_st_mesa library. Since the SSE4.1 files are needed for both libmesa_st_mesa and libmesa_dricore, move SSE4.1 files into a separate static library that can be used by both. Cc: "11.1 11.2" Signed-off-by: Rob Herring Reviewed-by: Emil Velikov --- src/mesa/Android.libmesa_dricore.mk | 7 +++--- src/mesa/Android.libmesa_sse41.mk | 44 +++++++++++++++++++++++++++++++++++++ src/mesa/Android.libmesa_st_mesa.mk | 4 +++- src/mesa/Android.mk | 1 + src/mesa/Makefile.sources | 4 ++++ 5 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 src/mesa/Android.libmesa_sse41.mk (limited to 'src/mesa') diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk index a3e6c6d55ae..d7647a76bd0 100644 --- a/src/mesa/Android.libmesa_dricore.mk +++ b/src/mesa/Android.libmesa_dricore.mk @@ -48,9 +48,8 @@ endif # x86 endif # MESA_ENABLE_ASM ifeq ($(ARCH_X86_HAVE_SSE4_1),true) -LOCAL_SRC_FILES += \ - main/streaming-load-memcpy.c \ - main/sse_minmax.c +LOCAL_WHOLE_STATIC_LIBRARIES := \ + libmesa_sse41 LOCAL_CFLAGS := \ -msse4.1 \ -DUSE_SSE41 @@ -63,7 +62,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary -LOCAL_WHOLE_STATIC_LIBRARIES := \ +LOCAL_WHOLE_STATIC_LIBRARIES += \ libmesa_program include $(LOCAL_PATH)/Android.gen.mk diff --git a/src/mesa/Android.libmesa_sse41.mk b/src/mesa/Android.libmesa_sse41.mk new file mode 100644 index 00000000000..8562da60193 --- /dev/null +++ b/src/mesa/Android.libmesa_sse41.mk @@ -0,0 +1,44 @@ +# Copyright 2012 Intel Corporation +# Copyright (C) 2010-2011 Chia-I Wu +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +ifeq ($(ARCH_X86_HAVE_SSE4_1),true) + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_sse41 + +LOCAL_SRC_FILES += \ + $(X86_SSE41_FILES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +endif diff --git a/src/mesa/Android.libmesa_st_mesa.mk b/src/mesa/Android.libmesa_st_mesa.mk index 9fd9460a5ba..bbd39562785 100644 --- a/src/mesa/Android.libmesa_st_mesa.mk +++ b/src/mesa/Android.libmesa_st_mesa.mk @@ -47,6 +47,8 @@ endif # x86 endif # MESA_ENABLE_ASM ifeq ($(ARCH_X86_HAVE_SSE4_1),true) +LOCAL_WHOLE_STATIC_LIBRARIES := \ + libmesa_sse41 LOCAL_CFLAGS := \ -DUSE_SSE41 endif @@ -58,7 +60,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include -LOCAL_WHOLE_STATIC_LIBRARIES := \ +LOCAL_WHOLE_STATIC_LIBRARIES += \ libmesa_program include $(LOCAL_PATH)/Android.gen.mk diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk index 20f781948be..9a1aef8b28e 100644 --- a/src/mesa/Android.mk +++ b/src/mesa/Android.mk @@ -24,5 +24,6 @@ include $(LOCAL_PATH)/Android.mesa_gen_matypes.mk include $(LOCAL_PATH)/Android.libmesa_glsl_utils.mk include $(LOCAL_PATH)/Android.libmesa_dricore.mk include $(LOCAL_PATH)/Android.libmesa_st_mesa.mk +include $(LOCAL_PATH)/Android.libmesa_sse41.mk include $(LOCAL_PATH)/program/Android.mk diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 54601a956fd..7425f01273d 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -587,6 +587,10 @@ X86_64_FILES = \ x86-64/x86-64.h \ x86-64/xform4.S +X86_SSE41_FILES = \ + main/streaming-load-memcpy.c \ + main/sse_minmax.c + SPARC_FILES = \ sparc/sparc.h \ sparc/sparc_clip.S \ -- cgit v1.2.3 From 14c46954c910efb1db94a068a866c7259deaa9d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 13:57:15 -0700 Subject: i965: Add an implemnetation of nir_op_fquantize2f16 Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 28 ++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4de559941ce..7839428c52e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -993,6 +993,34 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + fs_reg tmp16 = bld.vgrf(BRW_REGISTER_TYPE_D); + fs_reg tmp32 = bld.vgrf(BRW_REGISTER_TYPE_F); + fs_reg zero = bld.vgrf(BRW_REGISTER_TYPE_F); + + /* The destination stride must be at least as big as the source stride. */ + tmp16.type = BRW_REGISTER_TYPE_W; + tmp16.stride = 2; + + /* Check for denormal */ + fs_reg abs_src0 = op[0]; + abs_src0.abs = true; + bld.CMP(bld.null_reg_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L); + /* Get the appropriately signed zero */ + bld.AND(retype(zero, BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000)); + /* Do the actual F32 -> F16 -> F32 conversion */ + bld.emit(BRW_OPCODE_F32TO16, tmp16, op[0]); + bld.emit(BRW_OPCODE_F16TO32, tmp32, tmp16); + /* Select that or zero based on normal status */ + inst = bld.SEL(result, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index eef3940b643..ee6929b16a2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1188,6 +1188,31 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + /* See also vec4_visitor::emit_pack_half_2x16() */ + src_reg tmp16 = src_reg(this, glsl_type::uvec4_type); + src_reg tmp32 = src_reg(this, glsl_type::vec4_type); + src_reg zero = src_reg(this, glsl_type::vec4_type); + + /* Check for denormal */ + src_reg abs_src0 = op[0]; + abs_src0.abs = true; + emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L)); + /* Get the appropriately signed zero */ + emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000))); + /* Do the actual F32 -> F16 -> F32 conversion */ + emit(F32TO16(dst_reg(tmp16), op[0])); + emit(F16TO32(dst_reg(tmp32), tmp16)); + /* Select that or zero based on normal status */ + inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: -- cgit v1.2.3