diff options
94 files changed, 1786 insertions, 1115 deletions
diff --git a/docs/GL3.txt b/docs/GL3.txt index 5b6dc89e250..c48802a9f7b 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -172,7 +172,7 @@ GL 4.3, GLSL 4.30: GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe) - GL_ARB_framebuffer_no_attachments DONE (i965) + GL_ARB_framebuffer_no_attachments DONE (i965, r600, radeonsi) GL_ARB_internalformat_query2 DONE (all drivers) GL_ARB_invalidate_subdata DONE (all drivers) GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html index 6f8fcfbde29..d56f6553fe9 100644 --- a/docs/relnotes/11.3.0.html +++ b/docs/relnotes/11.3.0.html @@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers. </p> <ul> +<li>GL_ARB_framebuffer_no_attachments on r600, radeonsi</li> <li>GL_ARB_internalformat_query2 on all drivers</li> <li>GL_ARB_shader_atomic_counter_ops on nvc0</li> <li>GL_ARB_shader_image_load_store on radeonsi, softpipe</li> @@ -53,6 +54,7 @@ Note: some of the new features are only available with certain drivers. <li>GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend</li> <li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li> <li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li> +<li>EGL_KHR_reusable_sync on all drivers</li> </ul> <h2>Bug fixes</h2> diff --git a/src/compiler/glsl/link_uniform_blocks.cpp b/src/compiler/glsl/link_uniform_blocks.cpp index c8fa181a15d..58f22fd61c5 100644 --- a/src/compiler/glsl/link_uniform_blocks.cpp +++ b/src/compiler/glsl/link_uniform_blocks.cpp @@ -216,7 +216,7 @@ process_block_array(struct uniform_block_array_elements *ub_array, char **name, { if (ub_array) { for (unsigned j = 0; j < ub_array->num_array_elements; j++) { - size_t new_length = name_length; + size_t new_length = name_length; /* Append the subscript to the current variable name */ ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", @@ -261,7 +261,6 @@ process_block_array(struct uniform_block_array_elements *ub_array, char **name, } blocks[i].NumUniforms = (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms); - blocks[i].IsShaderStorage = b->is_shader_storage; *block_index = *block_index + 1; *binding_offset = *binding_offset + 1; @@ -291,13 +290,105 @@ resize_block_array(const glsl_type *type, } } -unsigned +static void +create_buffer_blocks(void *mem_ctx, struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_uniform_block **out_blks, unsigned num_blocks, + struct hash_table *block_hash, unsigned num_variables, + bool create_ubo_blocks) +{ + if (num_blocks == 0) { + assert(num_variables == 0); + return; + } + + assert(num_variables != 0); + + /* Allocate storage to hold all of the information related to uniform + * blocks that can be queried through the API. + */ + struct gl_uniform_block *blocks = ralloc_array(mem_ctx, gl_uniform_block, num_blocks); + gl_uniform_buffer_variable *variables = + ralloc_array(blocks, gl_uniform_buffer_variable, num_variables); + + /* Add each variable from each uniform block to the API tracking + * structures. + */ + ubo_visitor parcel(blocks, variables, num_variables); + + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140) + == unsigned(ubo_packing_std140)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED) + == unsigned(ubo_packing_shared)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED) + == unsigned(ubo_packing_packed)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430) + == unsigned(ubo_packing_std430)); + + unsigned i = 0; + struct hash_entry *entry; + hash_table_foreach (block_hash, entry) { + const struct link_uniform_block_active *const b = + (const struct link_uniform_block_active *) entry->data; + const glsl_type *block_type = b->type; + + if ((create_ubo_blocks && !b->is_shader_storage) || + (!create_ubo_blocks && b->is_shader_storage)) { + + if (b->array != NULL) { + unsigned binding_offset = 0; + char *name = ralloc_strdup(NULL, + block_type->without_array()->name); + size_t name_length = strlen(name); + + assert(b->has_instance_name); + process_block_array(b->array, &name, name_length, blocks, &parcel, + variables, b, &i, &binding_offset, ctx, prog); + ralloc_free(name); + } else { + blocks[i].Name = ralloc_strdup(blocks, block_type->name); + blocks[i].Uniforms = &variables[parcel.index]; + blocks[i].Binding = (b->has_binding) ? b->binding : 0; + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = + gl_uniform_block_packing(block_type->interface_packing); + + parcel.process(block_type, + b->has_instance_name ? block_type->name : ""); + + blocks[i].UniformBufferSize = parcel.buffer_size; + + /* Check SSBO size is lower than maximum supported size for SSBO + */ + if (b->is_shader_storage && + parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "shader storage block `%s' has size %d, " + "which is larger than than the maximum allowed (%d)", + block_type->name, parcel.buffer_size, + ctx->Const.MaxShaderStorageBlockSize); + } + blocks[i].NumUniforms = (unsigned)(ptrdiff_t) + (&variables[parcel.index] - blocks[i].Uniforms); + i++; + } + } + } + + *out_blks = blocks; + + assert(parcel.index == num_variables); +} + +void link_uniform_blocks(void *mem_ctx, struct gl_context *ctx, struct gl_shader_program *prog, struct gl_shader **shader_list, unsigned num_shaders, - struct gl_uniform_block **blocks_ret) + struct gl_uniform_block **ubo_blocks, + unsigned *num_ubo_blocks, + struct gl_uniform_block **ssbo_blocks, + unsigned *num_ssbo_blocks) { /* This hash table will track all of the uniform blocks that have been * encountered. Since blocks with the same block-name must be the same, @@ -310,7 +401,7 @@ link_uniform_blocks(void *mem_ctx, if (block_hash == NULL) { _mesa_error_no_memory(__func__); linker_error(prog, "out of memory\n"); - return 0; + return; } /* Determine which uniform blocks are active. @@ -323,8 +414,8 @@ link_uniform_blocks(void *mem_ctx, /* Count the number of active uniform blocks. Count the total number of * active slots in those uniform blocks. */ - unsigned num_blocks = 0; - unsigned num_variables = 0; + unsigned num_ubo_variables = 0; + unsigned num_ssbo_variables = 0; count_block_size block_size; struct hash_entry *entry; @@ -346,102 +437,36 @@ link_uniform_blocks(void *mem_ctx, if (b->array != NULL) { unsigned aoa_size = b->type->arrays_of_arrays_size(); - num_blocks += aoa_size; - num_variables += aoa_size * block_size.num_active_uniforms; - } else { - num_blocks++; - num_variables += block_size.num_active_uniforms; - } - - } - - if (num_blocks == 0) { - assert(num_variables == 0); - _mesa_hash_table_destroy(block_hash, NULL); - return 0; - } - - assert(num_variables != 0); - - /* Allocate storage to hold all of the informatation related to uniform - * blocks that can be queried through the API. - */ - gl_uniform_block *blocks = - ralloc_array(mem_ctx, gl_uniform_block, num_blocks); - gl_uniform_buffer_variable *variables = - ralloc_array(blocks, gl_uniform_buffer_variable, num_variables); - - /* Add each variable from each uniform block to the API tracking - * structures. - */ - unsigned i = 0; - ubo_visitor parcel(blocks, variables, num_variables); - - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140) - == unsigned(ubo_packing_std140)); - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED) - == unsigned(ubo_packing_shared)); - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED) - == unsigned(ubo_packing_packed)); - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430) - == unsigned(ubo_packing_std430)); - - hash_table_foreach (block_hash, entry) { - const struct link_uniform_block_active *const b = - (const struct link_uniform_block_active *) entry->data; - const glsl_type *block_type = b->type; - - if (b->array != NULL) { - unsigned binding_offset = 0; - char *name = ralloc_strdup(NULL, block_type->without_array()->name); - size_t name_length = strlen(name); - - assert(b->has_instance_name); - process_block_array(b->array, &name, name_length, blocks, &parcel, - variables, b, &i, &binding_offset, ctx, prog); - ralloc_free(name); + if (b->is_shader_storage) { + *num_ssbo_blocks += aoa_size; + num_ssbo_variables += aoa_size * block_size.num_active_uniforms; + } else { + *num_ubo_blocks += aoa_size; + num_ubo_variables += aoa_size * block_size.num_active_uniforms; + } } else { - blocks[i].Name = ralloc_strdup(blocks, block_type->name); - blocks[i].Uniforms = &variables[parcel.index]; - blocks[i].Binding = (b->has_binding) ? b->binding : 0; - blocks[i].UniformBufferSize = 0; - blocks[i]._Packing = - gl_uniform_block_packing(block_type->interface_packing); - - parcel.process(block_type, - b->has_instance_name ? block_type->name : ""); - - blocks[i].UniformBufferSize = parcel.buffer_size; - - /* Check SSBO size is lower than maximum supported size for SSBO */ - if (b->is_shader_storage && - parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { - linker_error(prog, "shader storage block `%s' has size %d, " - "which is larger than than the maximum allowed (%d)", - block_type->name, - parcel.buffer_size, - ctx->Const.MaxShaderStorageBlockSize); + if (b->is_shader_storage) { + (*num_ssbo_blocks)++; + num_ssbo_variables += block_size.num_active_uniforms; + } else { + (*num_ubo_blocks)++; + num_ubo_variables += block_size.num_active_uniforms; } - blocks[i].NumUniforms = - (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); - - blocks[i].IsShaderStorage = b->is_shader_storage; - - i++; } + } - assert(parcel.index == num_variables); + create_buffer_blocks(mem_ctx, ctx, prog, ubo_blocks, *num_ubo_blocks, + block_hash, num_ubo_variables, true); + create_buffer_blocks(mem_ctx, ctx, prog, ssbo_blocks, *num_ssbo_blocks, + block_hash, num_ssbo_variables, false); _mesa_hash_table_destroy(block_hash, NULL); - - *blocks_ret = blocks; - return num_blocks; } bool link_uniform_blocks_are_compatible(const gl_uniform_block *a, - const gl_uniform_block *b) + const gl_uniform_block *b) { assert(strcmp(a->Name, b->Name) == 0); @@ -464,13 +489,13 @@ link_uniform_blocks_are_compatible(const gl_uniform_block *a, for (unsigned i = 0; i < a->NumUniforms; i++) { if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0) - return false; + return false; if (a->Uniforms[i].Type != b->Uniforms[i].Type) - return false; + return false; if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor) - return false; + return false; } return true; diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp index e5edf2e72e4..c6346d573ab 100644 --- a/src/compiler/glsl/link_uniform_initializers.cpp +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -154,11 +154,17 @@ set_opaque_binding(void *mem_ctx, gl_shader_program *prog, } void -set_block_binding(gl_shader_program *prog, const char *block_name, int binding) +set_block_binding(gl_shader_program *prog, const char *block_name, + unsigned mode, int binding) { - for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { - if (!strcmp(prog->BufferInterfaceBlocks[i].Name, block_name)) { - prog->BufferInterfaceBlocks[i].Binding = binding; + unsigned num_blocks = mode == ir_var_uniform ? prog->NumUniformBlocks : + prog->NumShaderStorageBlocks; + struct gl_uniform_block *blks = mode == ir_var_uniform ? + prog->UniformBlocks : prog->ShaderStorageBlocks; + + for (unsigned i = 0; i < num_blocks; i++) { + if (!strcmp(blks[i].Name, block_name)) { + blks[i].Binding = binding; return; } } @@ -308,11 +314,12 @@ link_set_uniform_initializers(struct gl_shader_program *prog, * each subsequent element takes the next consecutive * uniform block binding point." */ - linker::set_block_binding(prog, name, + linker::set_block_binding(prog, name, var->data.mode, var->data.binding + i); } } else { linker::set_block_binding(prog, iface_type->name, + var->data.mode, var->data.binding); } } else if (type->contains_atomic()) { diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp index 7d8a4b4fb79..8db60a36f16 100644 --- a/src/compiler/glsl/link_uniforms.cpp +++ b/src/compiler/glsl/link_uniforms.cpp @@ -462,7 +462,7 @@ public: buffer_block_index = -1; if (var->is_in_buffer_block()) { - struct gl_uniform_block **blks = var->is_in_shader_storage_block() ? + struct gl_uniform_block *blks = var->is_in_shader_storage_block() ? prog->ShaderStorageBlocks : prog->UniformBlocks; unsigned num_blks = var->is_in_shader_storage_block() ? prog->NumShaderStorageBlocks : prog->NumUniformBlocks; @@ -471,15 +471,15 @@ public: unsigned l = strlen(var->get_interface_type()->name); for (unsigned i = 0; i < num_blks; i++) { - if (strncmp(var->get_interface_type()->name, blks[i]->Name, l) - == 0 && blks[i]->Name[l] == '[') { + if (strncmp(var->get_interface_type()->name, blks[i].Name, l) + == 0 && blks[i].Name[l] == '[') { buffer_block_index = i; break; } } } else { for (unsigned i = 0; i < num_blks; i++) { - if (strcmp(var->get_interface_type()->name, blks[i]->Name) == + if (strcmp(var->get_interface_type()->name, blks[i].Name) == 0) { buffer_block_index = i; break; @@ -500,7 +500,7 @@ public: var->get_interface_type()->name); } else { const struct gl_uniform_block *const block = - blks[buffer_block_index]; + &blks[buffer_block_index]; assert(var->data.location != -1); @@ -960,11 +960,16 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) sentinel = '['; } + unsigned num_blocks = var->data.mode == ir_var_uniform ? + shader->NumUniformBlocks : shader->NumShaderStorageBlocks; + struct gl_uniform_block **blks = var->data.mode == ir_var_uniform ? + shader->UniformBlocks : shader->ShaderStorageBlocks; + const unsigned l = strlen(var->name); - for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) { - for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i]->NumUniforms; j++) { + for (unsigned i = 0; i < num_blocks; i++) { + for (unsigned j = 0; j < blks[i]->NumUniforms; j++) { if (sentinel) { - const char *begin = shader->BufferInterfaceBlocks[i]->Uniforms[j].Name; + const char *begin = blks[i]->Uniforms[j].Name; const char *end = strchr(begin, sentinel); if (end == NULL) @@ -978,8 +983,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) var->data.location = j; break; } - } else if (!strcmp(var->name, - shader->BufferInterfaceBlocks[i]->Uniforms[j].Name)) { + } else if (!strcmp(var->name, blks[i]->Uniforms[j].Name)) { found = true; var->data.location = j; break; @@ -1104,11 +1108,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog, sh->num_uniform_components = uniform_size.num_shader_uniform_components; sh->num_combined_uniform_components = sh->num_uniform_components; - for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) { - if (!sh->BufferInterfaceBlocks[i]->IsShaderStorage) { - sh->num_combined_uniform_components += - sh->BufferInterfaceBlocks[i]->UniformBufferSize / 4; - } + for (unsigned i = 0; i < sh->NumUniformBlocks; i++) { + sh->num_combined_uniform_components += + sh->UniformBlocks[i]->UniformBufferSize / 4; } } diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index d9a681ccca1..957efe5b55d 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -1165,39 +1165,58 @@ cross_validate_uniforms(struct gl_shader_program *prog) } /** - * Accumulates the array of prog->BufferInterfaceBlocks and checks that all - * definitons of blocks agree on their contents. + * Accumulates the array of buffer blocks and checks that all definitions of + * blocks agree on their contents. */ static bool -interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) +interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog, + bool validate_ssbo) { int *InterfaceBlockStageIndex[MESA_SHADER_STAGES]; + struct gl_uniform_block *blks = NULL; + unsigned *num_blks = validate_ssbo ? &prog->NumShaderStorageBlocks : + &prog->NumUniformBlocks; - unsigned max_num_uniform_blocks = 0; + unsigned max_num_buffer_blocks = 0; for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i]) - max_num_uniform_blocks += prog->_LinkedShaders[i]->NumBufferInterfaceBlocks; + if (prog->_LinkedShaders[i]) { + if (validate_ssbo) { + max_num_buffer_blocks += + prog->_LinkedShaders[i]->NumShaderStorageBlocks; + } else { + max_num_buffer_blocks += + prog->_LinkedShaders[i]->NumUniformBlocks; + } + } } for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_shader *sh = prog->_LinkedShaders[i]; - InterfaceBlockStageIndex[i] = new int[max_num_uniform_blocks]; - for (unsigned int j = 0; j < max_num_uniform_blocks; j++) + InterfaceBlockStageIndex[i] = new int[max_num_buffer_blocks]; + for (unsigned int j = 0; j < max_num_buffer_blocks; j++) InterfaceBlockStageIndex[i][j] = -1; if (sh == NULL) continue; - for (unsigned int j = 0; j < sh->NumBufferInterfaceBlocks; j++) { - int index = link_cross_validate_uniform_block(prog, - &prog->BufferInterfaceBlocks, - &prog->NumBufferInterfaceBlocks, - sh->BufferInterfaceBlocks[j]); + unsigned sh_num_blocks; + struct gl_uniform_block **sh_blks; + if (validate_ssbo) { + sh_num_blocks = prog->_LinkedShaders[i]->NumShaderStorageBlocks; + sh_blks = sh->ShaderStorageBlocks; + } else { + sh_num_blocks = prog->_LinkedShaders[i]->NumUniformBlocks; + sh_blks = sh->UniformBlocks; + } + + for (unsigned int j = 0; j < sh_num_blocks; j++) { + int index = link_cross_validate_uniform_block(prog, &blks, num_blks, + sh_blks[j]); if (index == -1) { - linker_error(prog, "uniform block `%s' has mismatching definitions\n", - sh->BufferInterfaceBlocks[j]->Name); + linker_error(prog, "buffer block `%s' has mismatching " + "definitions\n", sh_blks[j]->Name); for (unsigned k = 0; k <= i; k++) { delete[] InterfaceBlockStageIndex[k]; @@ -1213,16 +1232,18 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) * FIXME: We should be able to free the per stage blocks here. */ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { + for (unsigned j = 0; j < *num_blks; j++) { int stage_index = InterfaceBlockStageIndex[i][j]; if (stage_index != -1) { struct gl_shader *sh = prog->_LinkedShaders[i]; - prog->BufferInterfaceBlocks[j].stageref |= (1 << i); + blks[j].stageref |= (1 << i); + + struct gl_uniform_block **sh_blks = validate_ssbo ? + sh->ShaderStorageBlocks : sh->UniformBlocks; - sh->BufferInterfaceBlocks[stage_index] = - &prog->BufferInterfaceBlocks[j]; + sh_blks[stage_index] = &blks[j]; } } } @@ -1231,6 +1252,11 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) delete[] InterfaceBlockStageIndex[i]; } + if (validate_ssbo) + prog->ShaderStorageBlocks = blks; + else + prog->UniformBlocks = blks; + return true; } @@ -2074,7 +2100,10 @@ link_intrastage_shaders(void *mem_ctx, struct gl_shader **shader_list, unsigned num_shaders) { - struct gl_uniform_block *uniform_blocks = NULL; + struct gl_uniform_block *ubo_blocks = NULL; + struct gl_uniform_block *ssbo_blocks = NULL; + unsigned num_ubo_blocks = 0; + unsigned num_ssbo_blocks = 0; /* Check that global variables defined in multiple shaders are consistent. */ @@ -2090,9 +2119,10 @@ link_intrastage_shaders(void *mem_ctx, return NULL; /* Link up uniform blocks defined within this stage. */ - const unsigned num_uniform_blocks = - link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders, - &uniform_blocks); + link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders, + &ubo_blocks, &num_ubo_blocks, &ssbo_blocks, + &num_ssbo_blocks); + if (!prog->LinkStatus) return NULL; @@ -2159,15 +2189,23 @@ link_intrastage_shaders(void *mem_ctx, linked->ir = new(linked) exec_list; clone_ir_list(mem_ctx, linked->ir, main->ir); - linked->BufferInterfaceBlocks = - ralloc_array(linked, gl_uniform_block *, num_uniform_blocks); - - ralloc_steal(linked, uniform_blocks); - for (unsigned i = 0; i < num_uniform_blocks; i++) { - linked->BufferInterfaceBlocks[i] = &uniform_blocks[i]; + /* Copy ubo blocks to linked shader list */ + linked->UniformBlocks = + ralloc_array(linked, gl_uniform_block *, num_ubo_blocks); + ralloc_steal(linked, ubo_blocks); + for (unsigned i = 0; i < num_ubo_blocks; i++) { + linked->UniformBlocks[i] = &ubo_blocks[i]; } + linked->NumUniformBlocks = num_ubo_blocks; - linked->NumBufferInterfaceBlocks = num_uniform_blocks; + /* Copy ssbo blocks to linked shader list */ + linked->ShaderStorageBlocks = + ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks); + ralloc_steal(linked, ssbo_blocks); + for (unsigned i = 0; i < num_ssbo_blocks; i++) { + linked->ShaderStorageBlocks[i] = &ssbo_blocks[i]; + } + linked->NumShaderStorageBlocks = num_ssbo_blocks; link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders); @@ -2973,21 +3011,22 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog) ctx->Const.MaxCombinedShaderStorageBlocks); } - for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { - /* Don't check SSBOs for Uniform Block Size */ - if (!prog->BufferInterfaceBlocks[i].IsShaderStorage && - prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) { + for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { + if (prog->UniformBlocks[i].UniformBufferSize > + ctx->Const.MaxUniformBlockSize) { linker_error(prog, "Uniform block %s too big (%d/%d)\n", - prog->BufferInterfaceBlocks[i].Name, - prog->BufferInterfaceBlocks[i].UniformBufferSize, + prog->UniformBlocks[i].Name, + prog->UniformBlocks[i].UniformBufferSize, ctx->Const.MaxUniformBlockSize); } + } - if (prog->BufferInterfaceBlocks[i].IsShaderStorage && - prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) { + for (unsigned i = 0; i < prog->NumShaderStorageBlocks; i++) { + if (prog->ShaderStorageBlocks[i].UniformBufferSize > + ctx->Const.MaxShaderStorageBlockSize) { linker_error(prog, "Shader storage block %s too big (%d/%d)\n", - prog->BufferInterfaceBlocks[i].Name, - prog->BufferInterfaceBlocks[i].UniformBufferSize, + prog->ShaderStorageBlocks[i].Name, + prog->ShaderStorageBlocks[i].UniformBufferSize, ctx->Const.MaxShaderStorageBlockSize); } } @@ -3295,8 +3334,8 @@ should_add_buffer_variable(struct gl_shader_program *shProg, if (type != GL_BUFFER_VARIABLE) return true; - for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - const char *block_name = shProg->BufferInterfaceBlocks[i].Name; + for (unsigned i = 0; i < shProg->NumShaderStorageBlocks; i++) { + const char *block_name = shProg->ShaderStorageBlocks[i].Name; block_name_len = strlen(block_name); const char *block_square_bracket = strchr(block_name, '['); @@ -3805,8 +3844,8 @@ calculate_array_size_and_stride(struct gl_shader_program *shProg, char *var_name = get_top_level_name(uni->name); char *interface_name = get_top_level_name(uni->is_shader_storage ? - shProg->ShaderStorageBlocks[block_index]->Name : - shProg->UniformBlocks[block_index]->Name); + shProg->ShaderStorageBlocks[block_index].Name : + shProg->UniformBlocks[block_index].Name); if (strcmp(var_name, interface_name) == 0) { /* Deal with instanced array of SSBOs */ @@ -3947,8 +3986,8 @@ build_program_resource_list(struct gl_context *ctx, int block_index = shProg->UniformStorage[i].block_index; if (block_index != -1) { stageref |= is_shader_storage ? - shProg->ShaderStorageBlocks[block_index]->stageref : - shProg->UniformBlocks[block_index]->stageref; + shProg->ShaderStorageBlocks[block_index].stageref : + shProg->UniformBlocks[block_index].stageref; } GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM; @@ -3965,12 +4004,17 @@ build_program_resource_list(struct gl_context *ctx, return; } - /* Add program uniform blocks and shader storage blocks. */ - for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - bool is_shader_storage = shProg->BufferInterfaceBlocks[i].IsShaderStorage; - GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK; - if (!add_program_resource(shProg, type, - &shProg->BufferInterfaceBlocks[i], 0)) + /* Add program uniform blocks. */ + for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) { + if (!add_program_resource(shProg, GL_UNIFORM_BLOCK, + &shProg->UniformBlocks[i], 0)) + return; + } + + /* Add program shader storage blocks. */ + for (unsigned i = 0; i < shProg->NumShaderStorageBlocks; i++) { + if (!add_program_resource(shProg, GL_SHADER_STORAGE_BLOCK, + &shProg->ShaderStorageBlocks[i], 0)) return; } @@ -4116,49 +4160,6 @@ link_assign_subroutine_types(struct gl_shader_program *prog) } static void -split_ubos_and_ssbos(void *mem_ctx, - struct gl_uniform_block **s_blks, - struct gl_uniform_block *p_blks, - unsigned num_blocks, - struct gl_uniform_block ***ubos, - unsigned *num_ubos, - struct gl_uniform_block ***ssbos, - unsigned *num_ssbos) -{ - unsigned num_ubo_blocks = 0; - unsigned num_ssbo_blocks = 0; - - /* Are we spliting the list of blocks for the shader or the program */ - bool is_shader = p_blks == NULL; - - for (unsigned i = 0; i < num_blocks; i++) { - if (is_shader ? s_blks[i]->IsShaderStorage : p_blks[i].IsShaderStorage) - num_ssbo_blocks++; - else - num_ubo_blocks++; - } - - *ubos = ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks); - *num_ubos = 0; - - *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks); - *num_ssbos = 0; - - for (unsigned i = 0; i < num_blocks; i++) { - struct gl_uniform_block *blk = is_shader ? s_blks[i] : &p_blks[i]; - if (blk->IsShaderStorage) { - (*ssbos)[*num_ssbos] = blk; - (*num_ssbos)++; - } else { - (*ubos)[*num_ubos] = blk; - (*num_ubos)++; - } - } - - assert(*num_ubos + *num_ssbos == num_blocks); -} - -static void set_always_active_io(exec_list *ir, ir_variable_mode io_mode) { assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); @@ -4498,7 +4499,12 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->SeparateShader) disable_varying_optimizations_for_sso(prog); - if (!interstage_cross_validate_uniform_blocks(prog)) + /* Process UBOs */ + if (!interstage_cross_validate_uniform_blocks(prog, false)) + goto done; + + /* Process SSBOs */ + if (!interstage_cross_validate_uniform_blocks(prog, true)) goto done; /* Do common optimization before assigning storage for attributes, @@ -4695,33 +4701,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) has_xfb_qualifiers)) goto done; - /* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks - * for gl_shader_program and gl_shader, so that drivers that need separate - * index spaces for each set can have that. - */ - for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] != NULL) { - gl_shader *sh = prog->_LinkedShaders[i]; - split_ubos_and_ssbos(sh, - sh->BufferInterfaceBlocks, - NULL, - sh->NumBufferInterfaceBlocks, - &sh->UniformBlocks, - &sh->NumUniformBlocks, - &sh->ShaderStorageBlocks, - &sh->NumShaderStorageBlocks); - } - } - - split_ubos_and_ssbos(prog, - NULL, - prog->BufferInterfaceBlocks, - prog->NumBufferInterfaceBlocks, - &prog->UniformBlocks, - &prog->NumUniformBlocks, - &prog->ShaderStorageBlocks, - &prog->NumShaderStorageBlocks); - update_array_sizes(prog); link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue, num_explicit_uniform_locs, diff --git a/src/compiler/glsl/linker.h b/src/compiler/glsl/linker.h index 97144df8ff7..3a0ec8b35d3 100644 --- a/src/compiler/glsl/linker.h +++ b/src/compiler/glsl/linker.h @@ -53,13 +53,16 @@ extern bool link_uniform_blocks_are_compatible(const gl_uniform_block *a, const gl_uniform_block *b); -extern unsigned +extern void link_uniform_blocks(void *mem_ctx, struct gl_context *ctx, struct gl_shader_program *prog, struct gl_shader **shader_list, unsigned num_shaders, - struct gl_uniform_block **blocks_ret); + struct gl_uniform_block **ubo_blocks, + unsigned *num_ubo_blocks, + struct gl_uniform_block **ssbo_blocks, + unsigned *num_ssbo_blocks); bool validate_intrastage_arrays(struct gl_shader_program *prog, diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp index 3155ab6225e..1a0140fad15 100644 --- a/src/compiler/glsl/lower_ubo_reference.cpp +++ b/src/compiler/glsl/lower_ubo_reference.cpp @@ -372,8 +372,7 @@ lower_ubo_reference_visitor::ubo_load(void *mem_ctx, static bool shader_storage_buffer_object(const _mesa_glsl_parse_state *state) { - return state->ARB_shader_storage_buffer_object_enable || - state->is_version(430, 310); + return state->has_shader_storage_buffer_objects(); } uint32_t diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp index 278d5450bfb..fcb12d1b77d 100644 --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -385,6 +385,26 @@ public: case ir_var_const_in: return this->lower_temps; + case ir_var_system_value: + /* There are only a few system values that have array types: + * + * gl_TessLevelInner[] + * gl_TessLevelOuter[] + * gl_SampleMaskIn[] + * + * The tessellation factor arrays are lowered to vec4/vec2s + * by lower_tess_level() before this pass occurs, so we'll + * never see them here. + * + * The only remaining case is gl_SampleMaskIn[], which has + * a length of ceil(ctx->Const.MaxSamples / 32). Most hardware + * supports no more than 32 samples, in which case our lowering + * produces a single read of gl_SampleMaskIn[0]. Even with 64x + * MSAA, the array length is only 2, so the lowering is fairly + * efficient. Therefore, lower unconditionally. + */ + return true; + case ir_var_shader_in: /* The input array size is unknown at compiler time for non-patch * inputs in TCS and TES. The arrays are sized to diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp index 49b4a26dc12..09d7d6e8c26 100644 --- a/src/compiler/glsl/standalone_scaffolding.cpp +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -105,10 +105,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) ralloc_free(shProg->InfoLog); shProg->InfoLog = ralloc_strdup(shProg, ""); - ralloc_free(shProg->BufferInterfaceBlocks); - shProg->BufferInterfaceBlocks = NULL; - shProg->NumBufferInterfaceBlocks = 0; - ralloc_free(shProg->UniformBlocks); shProg->UniformBlocks = NULL; shProg->NumUniformBlocks = 0; diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index b915101ce32..3a65ab18928 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -25,6 +25,7 @@ * */ +#include <inttypes.h> #include "nir_search.h" struct match_state { @@ -494,7 +495,7 @@ construct_value(const nir_search_value *value, break; case nir_type_int: - load->def.name = ralloc_asprintf(load, "%ld", c->data.i); + load->def.name = ralloc_asprintf(load, "%" PRIi64, c->data.i); switch (bitsize->dest_size) { case 32: load->value.i32[0] = c->data.i; @@ -508,7 +509,7 @@ construct_value(const nir_search_value *value, break; case nir_type_uint: - load->def.name = ralloc_asprintf(load, "%lu", c->data.u); + load->def.name = ralloc_asprintf(load, "%" PRIu64, c->data.u); switch (bitsize->dest_size) { case 32: load->value.u32[0] = c->data.u; diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 8f50f0ce573..490b0409c98 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -38,6 +38,8 @@ #include <fcntl.h> #include <errno.h> #include <unistd.h> +#include <c11/threads.h> +#include <time.h> #ifdef HAVE_LIBDRM #include <xf86drm.h> #include <drm_fourcc.h> @@ -623,6 +625,8 @@ dri2_setup_screen(_EGLDisplay *disp) disp->Extensions.KHR_cl_event2 = EGL_TRUE; } + disp->Extensions.KHR_reusable_sync = EGL_TRUE; + if (dri2_dpy->image) { if (dri2_dpy->image->base.version >= 10 && dri2_dpy->image->getCapabilities != NULL) { @@ -2394,7 +2398,12 @@ dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy, struct dri2_egl_sync *dri2_sync) { if (p_atomic_dec_zero(&dri2_sync->refcount)) { - dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, dri2_sync->fence); + if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR) + cnd_destroy(&dri2_sync->cond); + + if (dri2_sync->fence) + dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, dri2_sync->fence); + free(dri2_sync); } } @@ -2408,6 +2417,8 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_sync *dri2_sync; + EGLint ret; + pthread_condattr_t attr; dri2_sync = calloc(1, sizeof(struct dri2_egl_sync)); if (!dri2_sync) { @@ -2450,6 +2461,37 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, dri2_sync->fence, 0, 0)) dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR; break; + + case EGL_SYNC_REUSABLE_KHR: + /* intialize attr */ + ret = pthread_condattr_init(&attr); + + if (ret) { + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR"); + free(dri2_sync); + return NULL; + } + + /* change clock attribute to CLOCK_MONOTONIC */ + ret = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); + + if (ret) { + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR"); + free(dri2_sync); + return NULL; + } + + ret = pthread_cond_init(&dri2_sync->cond, &attr); + + if (ret) { + _eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR"); + free(dri2_sync); + return NULL; + } + + /* initial status of reusable sync must be "unsignaled" */ + dri2_sync->base.SyncStatus = EGL_UNSIGNALED_KHR; + break; } p_atomic_set(&dri2_sync->refcount, 1); @@ -2461,9 +2503,27 @@ dri2_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync); + EGLint ret = EGL_TRUE; + EGLint err; + + /* if type of sync is EGL_SYNC_REUSABLE_KHR and it is not signaled yet, + * then unlock all threads possibly blocked by the reusable sync before + * destroying it. + */ + if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR && + dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR) { + dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR; + /* unblock all threads currently blocked by sync */ + err = cnd_broadcast(&dri2_sync->cond); + if (err) { + _eglError(EGL_BAD_ACCESS, "eglDestroySyncKHR"); + ret = EGL_FALSE; + } + } dri2_egl_unref_sync(dri2_dpy, dri2_sync); - return EGL_TRUE; + + return ret; } static EGLint @@ -2471,10 +2531,16 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTime timeout) { _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv); struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync); unsigned wait_flags = 0; + + /* timespecs for cnd_timedwait */ + struct timespec current; + xtime expire; + EGLint ret = EGL_CONDITION_SATISFIED_KHR; /* The EGL_KHR_fence_sync spec states: @@ -2488,17 +2554,130 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, /* the sync object should take a reference while waiting */ dri2_egl_ref_sync(dri2_sync); - if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL, + switch (sync->Type) { + case EGL_SYNC_FENCE_KHR: + case EGL_SYNC_CL_EVENT_KHR: + if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL, dri2_sync->fence, wait_flags, timeout)) - dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR; - else - ret = EGL_TIMEOUT_EXPIRED_KHR; + dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR; + else + ret = EGL_TIMEOUT_EXPIRED_KHR; + break; + + case EGL_SYNC_REUSABLE_KHR: + if (dri2_ctx && dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR && + (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)) { + /* flush context if EGL_SYNC_FLUSH_COMMANDS_BIT_KHR is set */ + if (dri2_drv->glFlush) + dri2_drv->glFlush(); + } + + /* if timeout is EGL_FOREVER_KHR, it should wait without any timeout.*/ + if (timeout == EGL_FOREVER_KHR) { + if (mtx_lock(&dri2_sync->mutex)) { + ret = EGL_FALSE; + goto cleanup; + } + + ret = cnd_wait(&dri2_sync->cond, &dri2_sync->mutex); + if (mtx_unlock(&dri2_sync->mutex)) { + ret = EGL_FALSE; + goto cleanup; + } + + if (ret) { + _eglError(EGL_BAD_PARAMETER, "eglClientWaitSyncKHR"); + ret = EGL_FALSE; + } + } else { + /* if reusable sync has not been yet signaled */ + if (dri2_sync->base.SyncStatus != EGL_SIGNALED_KHR) { + clock_gettime(CLOCK_MONOTONIC, ¤t); + + /* calculating when to expire */ + expire.nsec = timeout % 1000000000L; + expire.sec = timeout / 1000000000L; + + expire.nsec += current.tv_nsec; + expire.sec += current.tv_sec; + + /* expire.nsec now is a number between 0 and 1999999998 */ + if (expire.nsec > 999999999L) { + expire.sec++; + expire.nsec -= 1000000000L; + } + + if (mtx_lock(&dri2_sync->mutex)) { + ret = EGL_FALSE; + goto cleanup; + } + + ret = cnd_timedwait(&dri2_sync->cond, &dri2_sync->mutex, &expire); + + if (mtx_unlock(&dri2_sync->mutex)) { + ret = EGL_FALSE; + goto cleanup; + } + + if (ret) + if (ret == thrd_busy) { + if (dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR) { + ret = EGL_TIMEOUT_EXPIRED_KHR; + } else { + _eglError(EGL_BAD_ACCESS, "eglClientWaitSyncKHR"); + ret = EGL_FALSE; + } + } + } + } + break; + } + + cleanup: dri2_egl_unref_sync(dri2_dpy, dri2_sync); + + if (ret == EGL_FALSE) { + _eglError(EGL_BAD_ACCESS, "eglClientWaitSyncKHR"); + return EGL_FALSE; + } + return ret; } +static EGLBoolean +dri2_signal_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, + EGLenum mode) +{ + struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync); + EGLint ret; + + if (sync->Type != EGL_SYNC_REUSABLE_KHR) { + _eglError(EGL_BAD_MATCH, "eglSignalSyncKHR"); + return EGL_FALSE; + } + + if (mode != EGL_SIGNALED_KHR && mode != EGL_UNSIGNALED_KHR) { + _eglError(EGL_BAD_ATTRIBUTE, "eglSignalSyncKHR"); + return EGL_FALSE; + } + + dri2_sync->base.SyncStatus = mode; + + if (mode == EGL_SIGNALED_KHR) { + ret = cnd_broadcast(&dri2_sync->cond); + + /* fail to broadcast */ + if (ret) { + _eglError(EGL_BAD_ACCESS, "eglSignalSyncKHR"); + return EGL_FALSE; + } + } + + return EGL_TRUE; +} + static EGLint dri2_server_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync) { @@ -2620,6 +2799,7 @@ _eglBuiltInDriverDRI2(const char *args) dri2_drv->base.API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium; dri2_drv->base.API.CreateSyncKHR = dri2_create_sync; dri2_drv->base.API.ClientWaitSyncKHR = dri2_client_wait_sync; + dri2_drv->base.API.SignalSyncKHR = dri2_signal_sync; dri2_drv->base.API.WaitSyncKHR = dri2_server_wait_sync; dri2_drv->base.API.DestroySyncKHR = dri2_destroy_sync; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 52ad92b182d..ef799398474 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -307,6 +307,8 @@ struct dri2_egl_image struct dri2_egl_sync { _EGLSync base; + mtx_t mutex; + cnd_t cond; int refcount; void *fence; }; diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 8886759011a..64ffe92be43 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1469,9 +1469,24 @@ eglClientWaitSync(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout) if (s->SyncStatus == EGL_SIGNALED_KHR) RETURN_EGL_EVAL(disp, EGL_CONDITION_SATISFIED_KHR); + /* if sync type is EGL_SYNC_REUSABLE_KHR, dpy should be + * unlocked here to allow other threads also to be able to + * go into waiting state. + */ + + if (s->Type == EGL_SYNC_REUSABLE_KHR) + _eglUnlockDisplay(dpy); + ret = drv->API.ClientWaitSyncKHR(drv, disp, s, flags, timeout); - RETURN_EGL_EVAL(disp, ret); + /* + * 'disp' is already unlocked for reusable sync type, + * so passing 'NULL' to bypass unlocking display. + */ + if (s->Type == EGL_SYNC_REUSABLE_KHR) + RETURN_EGL_EVAL(NULL, ret); + else + RETURN_EGL_EVAL(disp, ret); } diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c index 999cb480c4b..33625e97ae3 100644 --- a/src/egl/main/eglsync.c +++ b/src/egl/main/eglsync.c @@ -152,7 +152,8 @@ _eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, /* update the sync status */ if (sync->SyncStatus != EGL_SIGNALED_KHR && (sync->Type == EGL_SYNC_FENCE_KHR || - sync->Type == EGL_SYNC_CL_EVENT_KHR)) + sync->Type == EGL_SYNC_CL_EVENT_KHR || + sync->Type == EGL_SYNC_REUSABLE_KHR)) drv->API.ClientWaitSyncKHR(drv, dpy, sync, 0, 0); *value = sync->SyncStatus; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index cd9ee5434d3..a5f07236e83 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -163,7 +163,7 @@ aa_transform_decl(struct tgsi_transform_context *ctx, uint i; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - aactx->samplersUsed |= 1 << i; + aactx->samplersUsed |= 1u << i; } } else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { @@ -208,9 +208,11 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; uint i; + STATIC_ASSERT(sizeof(aactx->samplersUsed) * 8 >= PIPE_MAX_SAMPLERS); + /* find free sampler */ aactx->freeSampler = free_bit(aactx->samplersUsed); - if (aactx->freeSampler >= PIPE_MAX_SAMPLERS) + if (aactx->freeSampler < 0 || aactx->freeSampler >= PIPE_MAX_SAMPLERS) aactx->freeSampler = PIPE_MAX_SAMPLERS - 1; /* find two free temp regs */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index d90fb1d68df..c5ef16810a2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -464,7 +464,8 @@ scan_declaration(struct tgsi_shader_info *info, } } } else if (file == TGSI_FILE_SAMPLER) { - info->samplers_declared |= 1 << reg; + STATIC_ASSERT(sizeof(info->samplers_declared) * 8 >= PIPE_MAX_SAMPLERS); + info->samplers_declared |= 1u << reg; } else if (file == TGSI_FILE_SAMPLER_VIEW) { unsigned target = fulldecl->SamplerView.Resource; assert(target < TGSI_TEXTURE_UNKNOWN); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index a73a1de2f0b..b1f3982fb4e 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -645,6 +645,8 @@ util_dump_framebuffer_state(FILE *stream, const struct pipe_framebuffer_state *s util_dump_member(stream, uint, state, width); util_dump_member(stream, uint, state, height); + util_dump_member(stream, uint, state, samples); + util_dump_member(stream, uint, state, layers); util_dump_member(stream, uint, state, nr_cbufs); util_dump_member_array(stream, ptr, state, cbufs); util_dump_member(stream, ptr, state, zsbuf); diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c index 49b391d8162..f9b804673dc 100644 --- a/src/gallium/auxiliary/util/u_framebuffer.c +++ b/src/gallium/auxiliary/util/u_framebuffer.c @@ -55,6 +55,10 @@ util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, dst->height != src->height) return FALSE; + if (dst->samples != src->samples || + dst->layers != src->layers) + return FALSE; + if (dst->nr_cbufs != src->nr_cbufs) { return FALSE; } @@ -85,6 +89,9 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; + dst->samples = src->samples; + dst->layers = src->layers; + for (i = 0; i < src->nr_cbufs; i++) pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); @@ -109,6 +116,7 @@ util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb) pipe_surface_reference(&fb->zsbuf, NULL); + fb->samples = fb->layers = 0; fb->width = fb->height = 0; fb->nr_cbufs = 0; } @@ -160,6 +168,14 @@ util_framebuffer_get_num_layers(const struct pipe_framebuffer_state *fb) { unsigned i, num_layers = 0; + /** + * In the case of ARB_framebuffer_no_attachment + * we obtain the number of layers directly from + * the framebuffer state. + */ + if (!(fb->nr_cbufs || fb->zsbuf)) + return fb->layers; + for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]) { unsigned num = fb->cbufs[i]->u.tex.last_layer - @@ -184,6 +200,20 @@ util_framebuffer_get_num_samples(const struct pipe_framebuffer_state *fb) { unsigned i; + /** + * In the case of ARB_framebuffer_no_attachment + * we obtain the number of samples directly from + * the framebuffer state. + * + * NOTE: fb->samples may wind up as zero due to memset()'s on internal + * driver structures on their initialization and so we take the + * MAX here to ensure we have a valid number of samples. However, + * if samples is legitimately not getting set somewhere + * multi-sampling will evidently break. + */ + if (!(fb->nr_cbufs || fb->zsbuf)) + return MAX2(fb->samples, 1); + for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]) { return MAX2(1, fb->cbufs[i]->texture->nr_samples); diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index bcbe2a25b25..3ae8923f953 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -204,7 +204,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - pctx->samplersUsed |= 1 << i; + pctx->samplersUsed |= 1u << i; } } else if (decl->Declaration.File == pctx->wincoordFile) { @@ -266,9 +266,11 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) int texTemp; int sampIdx; + STATIC_ASSERT(sizeof(pctx->samplersUsed) * 8 >= PIPE_MAX_SAMPLERS); + /* find free texture sampler */ pctx->freeSampler = free_bit(pctx->samplersUsed); - if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) + if (pctx->freeSampler < 0 || pctx->freeSampler >= PIPE_MAX_SAMPLERS) pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; if (pctx->wincoordInput < 0) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 47a19de6ea9..824f580ed44 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -323,6 +323,14 @@ The integer capabilities: * ``PIPE_CAP_PCI_BUS``: Return the PCI bus number. * ``PIPE_CAP_PCI_DEVICE``: Return the PCI device number. * ``PIPE_CAP_PCI_FUNCTION``: Return the PCI function number. +* ``PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT``: + If non-zero, rendering to framebuffers with no surface attachments + is supported. The context->is_format_supported function will be expected + to be implemented with PIPE_FORMAT_NONE yeilding the MSAA modes the hardware + supports. N.B., The maximum number of layers supported for rasterizing a + primitive on a layer is obtained from ``PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS`` + even though it can be larger than the number of layers supported by either + rendering or textures. .. _pipe_capf: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index d47cb07f10b..707be17513b 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -255,6 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c index 599872470fc..e29d1568256 100644 --- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c @@ -243,7 +243,7 @@ static void print_instr_cat2(instr_t *instr) "?6?", }; - switch (cat2->opc) { + switch (_OPC(2, cat2->opc)) { case OPC_CMPS_F: case OPC_CMPS_U: case OPC_CMPS_S: @@ -274,7 +274,7 @@ static void print_instr_cat2(instr_t *instr) cat2->src1_abs, false); } - switch (cat2->opc) { + switch (_OPC(2, cat2->opc)) { case OPC_ABSNEG_F: case OPC_ABSNEG_S: case OPC_CLZ_B: @@ -382,34 +382,34 @@ static void print_instr_cat5(instr_t *instr) static const struct { bool src1, src2, samp, tex; } info[0x1f] = { - [OPC_ISAM] = { true, false, true, true, }, - [OPC_ISAML] = { true, true, true, true, }, - [OPC_ISAMM] = { true, false, true, true, }, - [OPC_SAM] = { true, false, true, true, }, - [OPC_SAMB] = { true, true, true, true, }, - [OPC_SAML] = { true, true, true, true, }, - [OPC_SAMGQ] = { true, false, true, true, }, - [OPC_GETLOD] = { true, false, true, true, }, - [OPC_CONV] = { true, true, true, true, }, - [OPC_CONVM] = { true, true, true, true, }, - [OPC_GETSIZE] = { true, false, false, true, }, - [OPC_GETBUF] = { false, false, false, true, }, - [OPC_GETPOS] = { true, false, false, true, }, - [OPC_GETINFO] = { false, false, false, true, }, - [OPC_DSX] = { true, false, false, false, }, - [OPC_DSY] = { true, false, false, false, }, - [OPC_GATHER4R] = { true, false, true, true, }, - [OPC_GATHER4G] = { true, false, true, true, }, - [OPC_GATHER4B] = { true, false, true, true, }, - [OPC_GATHER4A] = { true, false, true, true, }, - [OPC_SAMGP0] = { true, false, true, true, }, - [OPC_SAMGP1] = { true, false, true, true, }, - [OPC_SAMGP2] = { true, false, true, true, }, - [OPC_SAMGP3] = { true, false, true, true, }, - [OPC_DSXPP_1] = { true, false, false, false, }, - [OPC_DSYPP_1] = { true, false, false, false, }, - [OPC_RGETPOS] = { false, false, false, false, }, - [OPC_RGETINFO] = { false, false, false, false, }, + [opc_op(OPC_ISAM)] = { true, false, true, true, }, + [opc_op(OPC_ISAML)] = { true, true, true, true, }, + [opc_op(OPC_ISAMM)] = { true, false, true, true, }, + [opc_op(OPC_SAM)] = { true, false, true, true, }, + [opc_op(OPC_SAMB)] = { true, true, true, true, }, + [opc_op(OPC_SAML)] = { true, true, true, true, }, + [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, + [opc_op(OPC_GETLOD)] = { true, false, true, true, }, + [opc_op(OPC_CONV)] = { true, true, true, true, }, + [opc_op(OPC_CONVM)] = { true, true, true, true, }, + [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, + [opc_op(OPC_GETBUF)] = { false, false, false, true, }, + [opc_op(OPC_GETPOS)] = { true, false, false, true, }, + [opc_op(OPC_GETINFO)] = { false, false, false, true, }, + [opc_op(OPC_DSX)] = { true, false, false, false, }, + [opc_op(OPC_DSY)] = { true, false, false, false, }, + [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, + [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, + [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, + [opc_op(OPC_RGETPOS)] = { false, false, false, false, }, + [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, }; instr_cat5_t *cat5 = &instr->cat5; int i; @@ -423,7 +423,7 @@ static void print_instr_cat5(instr_t *instr) printf(" "); - switch (cat5->opc) { + switch (_OPC(5, cat5->opc)) { case OPC_DSXPP_1: case OPC_DSYPP_1: break; @@ -488,7 +488,7 @@ static void print_instr_cat6(instr_t *instr) memset(&src1, 0, sizeof(src1)); memset(&src2, 0, sizeof(src2)); - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_RESINFO: case OPC_RESFMT: dst.full = type_size(cat6->type) == 32; @@ -519,7 +519,7 @@ static void print_instr_cat6(instr_t *instr) break; } - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_PREFETCH: case OPC_RESINFO: break; @@ -545,7 +545,7 @@ static void print_instr_cat6(instr_t *instr) } printf(" "); - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_STG: sd = 'g'; break; @@ -636,7 +636,7 @@ static void print_instr_cat6(instr_t *instr) if (ss) printf("]"); - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_RESINFO: case OPC_RESFMT: break; @@ -656,7 +656,7 @@ static const struct opc_info { const char *name; void (*print)(instr_t *instr); } opcs[1 << (3+NOPC_BITS)] = { -#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat } +#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } /* category 0: */ OPC(0, OPC_NOP, nop), OPC(0, OPC_BR, br), @@ -672,7 +672,7 @@ static const struct opc_info { OPC(0, OPC_FLOW_REV, flow_rev), /* category 1: */ - OPC(1, 0, ), + OPC(1, OPC_MOV, ), /* category 2: */ OPC(2, OPC_ADD_F, add.f), @@ -822,8 +822,8 @@ static const struct opc_info { #include "ir3.h" const char *ir3_instr_name(struct ir3_instruction *instr) { - if (instr->category == -1) return "??meta??"; - return opcs[(instr->category << NOPC_BITS) | instr->opc].name; + if (opc_cat(instr->opc) == -1) return "??meta??"; + return opcs[instr->opc].name; } static void print_instr(uint32_t *dwords, int level, int n) diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 1b1f1f0a797..87083fd1e81 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -29,181 +29,189 @@ #include <stdint.h> #include <assert.h> +/* size of largest OPC field of all the instruction categories: */ +#define NOPC_BITS 6 + +#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) + typedef enum { /* category 0: */ - OPC_NOP = 0, - OPC_BR = 1, - OPC_JUMP = 2, - OPC_CALL = 3, - OPC_RET = 4, - OPC_KILL = 5, - OPC_END = 6, - OPC_EMIT = 7, - OPC_CUT = 8, - OPC_CHMASK = 9, - OPC_CHSH = 10, - OPC_FLOW_REV = 11, + OPC_NOP = _OPC(0, 0), + OPC_BR = _OPC(0, 1), + OPC_JUMP = _OPC(0, 2), + OPC_CALL = _OPC(0, 3), + OPC_RET = _OPC(0, 4), + OPC_KILL = _OPC(0, 5), + OPC_END = _OPC(0, 6), + OPC_EMIT = _OPC(0, 7), + OPC_CUT = _OPC(0, 8), + OPC_CHMASK = _OPC(0, 9), + OPC_CHSH = _OPC(0, 10), + OPC_FLOW_REV = _OPC(0, 11), /* category 1: */ - /* no opc.. all category 1 are variants of mov */ + OPC_MOV = _OPC(1, 0), /* category 2: */ - OPC_ADD_F = 0, - OPC_MIN_F = 1, - OPC_MAX_F = 2, - OPC_MUL_F = 3, - OPC_SIGN_F = 4, - OPC_CMPS_F = 5, - OPC_ABSNEG_F = 6, - OPC_CMPV_F = 7, + OPC_ADD_F = _OPC(2, 0), + OPC_MIN_F = _OPC(2, 1), + OPC_MAX_F = _OPC(2, 2), + OPC_MUL_F = _OPC(2, 3), + OPC_SIGN_F = _OPC(2, 4), + OPC_CMPS_F = _OPC(2, 5), + OPC_ABSNEG_F = _OPC(2, 6), + OPC_CMPV_F = _OPC(2, 7), /* 8 - invalid */ - OPC_FLOOR_F = 9, - OPC_CEIL_F = 10, - OPC_RNDNE_F = 11, - OPC_RNDAZ_F = 12, - OPC_TRUNC_F = 13, + OPC_FLOOR_F = _OPC(2, 9), + OPC_CEIL_F = _OPC(2, 10), + OPC_RNDNE_F = _OPC(2, 11), + OPC_RNDAZ_F = _OPC(2, 12), + OPC_TRUNC_F = _OPC(2, 13), /* 14-15 - invalid */ - OPC_ADD_U = 16, - OPC_ADD_S = 17, - OPC_SUB_U = 18, - OPC_SUB_S = 19, - OPC_CMPS_U = 20, - OPC_CMPS_S = 21, - OPC_MIN_U = 22, - OPC_MIN_S = 23, - OPC_MAX_U = 24, - OPC_MAX_S = 25, - OPC_ABSNEG_S = 26, + OPC_ADD_U = _OPC(2, 16), + OPC_ADD_S = _OPC(2, 17), + OPC_SUB_U = _OPC(2, 18), + OPC_SUB_S = _OPC(2, 19), + OPC_CMPS_U = _OPC(2, 20), + OPC_CMPS_S = _OPC(2, 21), + OPC_MIN_U = _OPC(2, 22), + OPC_MIN_S = _OPC(2, 23), + OPC_MAX_U = _OPC(2, 24), + OPC_MAX_S = _OPC(2, 25), + OPC_ABSNEG_S = _OPC(2, 26), /* 27 - invalid */ - OPC_AND_B = 28, - OPC_OR_B = 29, - OPC_NOT_B = 30, - OPC_XOR_B = 31, + OPC_AND_B = _OPC(2, 28), + OPC_OR_B = _OPC(2, 29), + OPC_NOT_B = _OPC(2, 30), + OPC_XOR_B = _OPC(2, 31), /* 32 - invalid */ - OPC_CMPV_U = 33, - OPC_CMPV_S = 34, + OPC_CMPV_U = _OPC(2, 33), + OPC_CMPV_S = _OPC(2, 34), /* 35-47 - invalid */ - OPC_MUL_U = 48, - OPC_MUL_S = 49, - OPC_MULL_U = 50, - OPC_BFREV_B = 51, - OPC_CLZ_S = 52, - OPC_CLZ_B = 53, - OPC_SHL_B = 54, - OPC_SHR_B = 55, - OPC_ASHR_B = 56, - OPC_BARY_F = 57, - OPC_MGEN_B = 58, - OPC_GETBIT_B = 59, - OPC_SETRM = 60, - OPC_CBITS_B = 61, - OPC_SHB = 62, - OPC_MSAD = 63, + OPC_MUL_U = _OPC(2, 48), + OPC_MUL_S = _OPC(2, 49), + OPC_MULL_U = _OPC(2, 50), + OPC_BFREV_B = _OPC(2, 51), + OPC_CLZ_S = _OPC(2, 52), + OPC_CLZ_B = _OPC(2, 53), + OPC_SHL_B = _OPC(2, 54), + OPC_SHR_B = _OPC(2, 55), + OPC_ASHR_B = _OPC(2, 56), + OPC_BARY_F = _OPC(2, 57), + OPC_MGEN_B = _OPC(2, 58), + OPC_GETBIT_B = _OPC(2, 59), + OPC_SETRM = _OPC(2, 60), + OPC_CBITS_B = _OPC(2, 61), + OPC_SHB = _OPC(2, 62), + OPC_MSAD = _OPC(2, 63), /* category 3: */ - OPC_MAD_U16 = 0, - OPC_MADSH_U16 = 1, - OPC_MAD_S16 = 2, - OPC_MADSH_M16 = 3, /* should this be .s16? */ - OPC_MAD_U24 = 4, - OPC_MAD_S24 = 5, - OPC_MAD_F16 = 6, - OPC_MAD_F32 = 7, - OPC_SEL_B16 = 8, - OPC_SEL_B32 = 9, - OPC_SEL_S16 = 10, - OPC_SEL_S32 = 11, - OPC_SEL_F16 = 12, - OPC_SEL_F32 = 13, - OPC_SAD_S16 = 14, - OPC_SAD_S32 = 15, + OPC_MAD_U16 = _OPC(3, 0), + OPC_MADSH_U16 = _OPC(3, 1), + OPC_MAD_S16 = _OPC(3, 2), + OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ + OPC_MAD_U24 = _OPC(3, 4), + OPC_MAD_S24 = _OPC(3, 5), + OPC_MAD_F16 = _OPC(3, 6), + OPC_MAD_F32 = _OPC(3, 7), + OPC_SEL_B16 = _OPC(3, 8), + OPC_SEL_B32 = _OPC(3, 9), + OPC_SEL_S16 = _OPC(3, 10), + OPC_SEL_S32 = _OPC(3, 11), + OPC_SEL_F16 = _OPC(3, 12), + OPC_SEL_F32 = _OPC(3, 13), + OPC_SAD_S16 = _OPC(3, 14), + OPC_SAD_S32 = _OPC(3, 15), /* category 4: */ - OPC_RCP = 0, - OPC_RSQ = 1, - OPC_LOG2 = 2, - OPC_EXP2 = 3, - OPC_SIN = 4, - OPC_COS = 5, - OPC_SQRT = 6, + OPC_RCP = _OPC(4, 0), + OPC_RSQ = _OPC(4, 1), + OPC_LOG2 = _OPC(4, 2), + OPC_EXP2 = _OPC(4, 3), + OPC_SIN = _OPC(4, 4), + OPC_COS = _OPC(4, 5), + OPC_SQRT = _OPC(4, 6), // 7-63 - invalid /* category 5: */ - OPC_ISAM = 0, - OPC_ISAML = 1, - OPC_ISAMM = 2, - OPC_SAM = 3, - OPC_SAMB = 4, - OPC_SAML = 5, - OPC_SAMGQ = 6, - OPC_GETLOD = 7, - OPC_CONV = 8, - OPC_CONVM = 9, - OPC_GETSIZE = 10, - OPC_GETBUF = 11, - OPC_GETPOS = 12, - OPC_GETINFO = 13, - OPC_DSX = 14, - OPC_DSY = 15, - OPC_GATHER4R = 16, - OPC_GATHER4G = 17, - OPC_GATHER4B = 18, - OPC_GATHER4A = 19, - OPC_SAMGP0 = 20, - OPC_SAMGP1 = 21, - OPC_SAMGP2 = 22, - OPC_SAMGP3 = 23, - OPC_DSXPP_1 = 24, - OPC_DSYPP_1 = 25, - OPC_RGETPOS = 26, - OPC_RGETINFO = 27, + OPC_ISAM = _OPC(5, 0), + OPC_ISAML = _OPC(5, 1), + OPC_ISAMM = _OPC(5, 2), + OPC_SAM = _OPC(5, 3), + OPC_SAMB = _OPC(5, 4), + OPC_SAML = _OPC(5, 5), + OPC_SAMGQ = _OPC(5, 6), + OPC_GETLOD = _OPC(5, 7), + OPC_CONV = _OPC(5, 8), + OPC_CONVM = _OPC(5, 9), + OPC_GETSIZE = _OPC(5, 10), + OPC_GETBUF = _OPC(5, 11), + OPC_GETPOS = _OPC(5, 12), + OPC_GETINFO = _OPC(5, 13), + OPC_DSX = _OPC(5, 14), + OPC_DSY = _OPC(5, 15), + OPC_GATHER4R = _OPC(5, 16), + OPC_GATHER4G = _OPC(5, 17), + OPC_GATHER4B = _OPC(5, 18), + OPC_GATHER4A = _OPC(5, 19), + OPC_SAMGP0 = _OPC(5, 20), + OPC_SAMGP1 = _OPC(5, 21), + OPC_SAMGP2 = _OPC(5, 22), + OPC_SAMGP3 = _OPC(5, 23), + OPC_DSXPP_1 = _OPC(5, 24), + OPC_DSYPP_1 = _OPC(5, 25), + OPC_RGETPOS = _OPC(5, 26), + OPC_RGETINFO = _OPC(5, 27), /* category 6: */ - OPC_LDG = 0, /* load-global */ - OPC_LDL = 1, - OPC_LDP = 2, - OPC_STG = 3, /* store-global */ - OPC_STL = 4, - OPC_STP = 5, - OPC_STI = 6, - OPC_G2L = 7, - OPC_L2G = 8, - OPC_PREFETCH = 9, - OPC_LDLW = 10, - OPC_STLW = 11, - OPC_RESFMT = 14, - OPC_RESINFO = 15, - OPC_ATOMIC_ADD = 16, - OPC_ATOMIC_SUB = 17, - OPC_ATOMIC_XCHG = 18, - OPC_ATOMIC_INC = 19, - OPC_ATOMIC_DEC = 20, - OPC_ATOMIC_CMPXCHG = 21, - OPC_ATOMIC_MIN = 22, - OPC_ATOMIC_MAX = 23, - OPC_ATOMIC_AND = 24, - OPC_ATOMIC_OR = 25, - OPC_ATOMIC_XOR = 26, - OPC_LDGB_TYPED_4D = 27, - OPC_STGB_4D_4 = 28, - OPC_STIB = 29, - OPC_LDC_4 = 30, - OPC_LDLV = 31, + OPC_LDG = _OPC(6, 0), /* load-global */ + OPC_LDL = _OPC(6, 1), + OPC_LDP = _OPC(6, 2), + OPC_STG = _OPC(6, 3), /* store-global */ + OPC_STL = _OPC(6, 4), + OPC_STP = _OPC(6, 5), + OPC_STI = _OPC(6, 6), + OPC_G2L = _OPC(6, 7), + OPC_L2G = _OPC(6, 8), + OPC_PREFETCH = _OPC(6, 9), + OPC_LDLW = _OPC(6, 10), + OPC_STLW = _OPC(6, 11), + OPC_RESFMT = _OPC(6, 14), + OPC_RESINFO = _OPC(6, 15), + OPC_ATOMIC_ADD = _OPC(6, 16), + OPC_ATOMIC_SUB = _OPC(6, 17), + OPC_ATOMIC_XCHG = _OPC(6, 18), + OPC_ATOMIC_INC = _OPC(6, 19), + OPC_ATOMIC_DEC = _OPC(6, 20), + OPC_ATOMIC_CMPXCHG = _OPC(6, 21), + OPC_ATOMIC_MIN = _OPC(6, 22), + OPC_ATOMIC_MAX = _OPC(6, 23), + OPC_ATOMIC_AND = _OPC(6, 24), + OPC_ATOMIC_OR = _OPC(6, 25), + OPC_ATOMIC_XOR = _OPC(6, 26), + OPC_LDGB_TYPED_4D = _OPC(6, 27), + OPC_STGB_4D_4 = _OPC(6, 28), + OPC_STIB = _OPC(6, 29), + OPC_LDC_4 = _OPC(6, 30), + OPC_LDLV = _OPC(6, 31), /* meta instructions (category -1): */ /* placeholder instr to mark shader inputs: */ - OPC_META_INPUT = 0, - OPC_META_PHI = 1, + OPC_META_INPUT = _OPC(-1, 0), + OPC_META_PHI = _OPC(-1, 1), /* The "fan-in" and "fan-out" instructions are used for keeping * track of instructions that write to multiple dst registers * (fan-out) like texture sample instructions, or read multiple * consecutive scalar registers (fan-in) (bary.f, texture samp) */ - OPC_META_FO = 2, - OPC_META_FI = 3, + OPC_META_FO = _OPC(-1, 2), + OPC_META_FI = _OPC(-1, 3), } opc_t; +#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) +#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) + typedef enum { TYPE_F16 = 0, TYPE_F32 = 1, @@ -472,7 +480,7 @@ typedef struct PACKED { static inline bool instr_cat3_full(instr_cat3_t *cat3) { - switch (cat3->opc) { + switch (_OPC(3, cat3->opc)) { case OPC_MAD_F16: case OPC_MAD_U16: case OPC_MAD_S16: diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 7d89142d7a1..3de8fdc11b3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -612,7 +612,7 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - int ret = emit[instr->category](instr, dwords, info); + int ret = emit[opc_cat(instr->opc)](instr, dwords, info); if (ret) goto fail; info->instrs_count += 1 + instr->repeat; @@ -683,23 +683,21 @@ static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg) } struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - int category, opc_t opc, int nreg) + opc_t opc, int nreg) { struct ir3_instruction *instr = instr_create(block, nreg); instr->block = block; - instr->category = category; instr->opc = opc; insert_instr(block, instr); return instr; } -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, - int category, opc_t opc) +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc) { /* NOTE: we could be slightly more clever, at least for non-meta, * and choose # of regs based on category. */ - return ir3_instr_create2(block, category, opc, 4); + return ir3_instr_create2(block, opc, 4); } struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 1a109d880e6..3859f6a39f3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -130,7 +130,6 @@ struct ir3_register { struct ir3_instruction { struct ir3_block *block; - int category; opc_t opc; enum { /* (sy) flag is set on first instruction, and after sample @@ -435,6 +434,16 @@ struct ir3_block { #endif }; +static inline uint32_t +block_id(struct ir3_block *block) +{ +#ifdef DEBUG + return block->serialno; +#else + return (uint32_t)(unsigned long)block; +#endif +} + struct ir3 * ir3_create(struct ir3_compiler *compiler, unsigned nin, unsigned nout); void ir3_destroy(struct ir3 *shader); @@ -444,10 +453,9 @@ void * ir3_alloc(struct ir3 *shader, int sz); struct ir3_block * ir3_block_create(struct ir3 *shader); -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, - int category, opc_t opc); +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - int category, opc_t opc, int nreg); + opc_t opc, int nreg); struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); const char *ir3_instr_name(struct ir3_instruction *instr); @@ -508,17 +516,17 @@ static inline uint32_t reg_comp(struct ir3_register *reg) static inline bool is_flow(struct ir3_instruction *instr) { - return (instr->category == 0); + return (opc_cat(instr->opc) == 0); } static inline bool is_kill(struct ir3_instruction *instr) { - return is_flow(instr) && (instr->opc == OPC_KILL); + return instr->opc == OPC_KILL; } static inline bool is_nop(struct ir3_instruction *instr) { - return is_flow(instr) && (instr->opc == OPC_NOP); + return instr->opc == OPC_NOP; } /* Is it a non-transformative (ie. not type changing) mov? This can @@ -538,75 +546,71 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr) if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) return false; - if ((instr->category == 1) && - (instr->cat1.src_type == instr->cat1.dst_type)) - return true; - if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) || - (instr->opc == OPC_ABSNEG_S))) + switch (instr->opc) { + case OPC_MOV: + return instr->cat1.src_type == instr->cat1.dst_type; + case OPC_ABSNEG_F: + case OPC_ABSNEG_S: return true; - return false; + default: + return false; + } } static inline bool is_alu(struct ir3_instruction *instr) { - return (1 <= instr->category) && (instr->category <= 3); + return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); } static inline bool is_sfu(struct ir3_instruction *instr) { - return (instr->category == 4); + return (opc_cat(instr->opc) == 4); } static inline bool is_tex(struct ir3_instruction *instr) { - return (instr->category == 5); + return (opc_cat(instr->opc) == 5); } static inline bool is_mem(struct ir3_instruction *instr) { - return (instr->category == 6); + return (opc_cat(instr->opc) == 6); } static inline bool is_store(struct ir3_instruction *instr) { - if (is_mem(instr)) { - /* these instructions, the "destination" register is - * actually a source, the address to store to. - */ - switch (instr->opc) { - case OPC_STG: - case OPC_STP: - case OPC_STL: - case OPC_STLW: - case OPC_L2G: - case OPC_G2L: - return true; - default: - break; - } + /* these instructions, the "destination" register is + * actually a source, the address to store to. + */ + switch (instr->opc) { + case OPC_STG: + case OPC_STP: + case OPC_STL: + case OPC_STLW: + case OPC_L2G: + case OPC_G2L: + return true; + default: + return false; } - return false; } static inline bool is_load(struct ir3_instruction *instr) { - if (is_mem(instr)) { - switch (instr->opc) { - case OPC_LDG: - case OPC_LDL: - case OPC_LDP: - case OPC_L2G: - case OPC_LDLW: - case OPC_LDC_4: - case OPC_LDLV: + switch (instr->opc) { + case OPC_LDG: + case OPC_LDL: + case OPC_LDP: + case OPC_L2G: + case OPC_LDLW: + case OPC_LDC_4: + case OPC_LDLV: /* probably some others too.. */ - return true; - default: - break; - } + return true; + default: + return false; } - return false; } static inline bool is_input(struct ir3_instruction *instr) @@ -615,9 +619,25 @@ static inline bool is_input(struct ir3_instruction *instr) * interpolation.. fortunately inloc is the first src * register in either case */ - if (is_mem(instr) && (instr->opc == OPC_LDLV)) + switch (instr->opc) { + case OPC_LDLV: + case OPC_BARY_F: + return true; + default: + return false; + } +} + +static inline bool is_bool(struct ir3_instruction *instr) +{ + switch (instr->opc) { + case OPC_CMPS_F: + case OPC_CMPS_S: + case OPC_CMPS_U: return true; - return (instr->category == 2) && (instr->opc == OPC_BARY_F); + default: + return false; + } } static inline bool is_meta(struct ir3_instruction *instr) @@ -626,7 +646,7 @@ static inline bool is_meta(struct ir3_instruction *instr) * might actually contribute some instructions to the final * result? */ - return (instr->category == -1); + return (opc_cat(instr->opc) == -1); } static inline bool writes_addr(struct ir3_instruction *instr) @@ -901,8 +921,7 @@ void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); static inline struct ir3_instruction * ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) { - struct ir3_instruction *instr = - ir3_instr_create(block, 1, 0); + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ if (src->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_register *src_reg = @@ -922,8 +941,7 @@ static inline struct ir3_instruction * ir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type, type_t dst_type) { - struct ir3_instruction *instr = - ir3_instr_create(block, 1, 0); + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; instr->cat1.src_type = src_type; @@ -935,45 +953,45 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src, static inline struct ir3_instruction * ir3_NOP(struct ir3_block *block) { - return ir3_instr_create(block, 0, OPC_NOP); + return ir3_instr_create(block, OPC_NOP); } -#define INSTR0(CAT, name) \ +#define INSTR0(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ return instr; \ } -#define INSTR1(CAT, name) \ +#define INSTR1(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ return instr; \ } -#define INSTR2(CAT, name) \ +#define INSTR2(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags, \ struct ir3_instruction *b, unsigned bflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ return instr; \ } -#define INSTR3(CAT, name) \ +#define INSTR3(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags, \ @@ -981,7 +999,7 @@ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *c, unsigned cflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ @@ -990,89 +1008,89 @@ ir3_##name(struct ir3_block *block, \ } /* cat0 instructions: */ -INSTR0(0, BR); -INSTR0(0, JUMP); -INSTR1(0, KILL); -INSTR0(0, END); +INSTR0(BR); +INSTR0(JUMP); +INSTR1(KILL); +INSTR0(END); /* cat2 instructions, most 2 src but some 1 src: */ -INSTR2(2, ADD_F) -INSTR2(2, MIN_F) -INSTR2(2, MAX_F) -INSTR2(2, MUL_F) -INSTR1(2, SIGN_F) -INSTR2(2, CMPS_F) -INSTR1(2, ABSNEG_F) -INSTR2(2, CMPV_F) -INSTR1(2, FLOOR_F) -INSTR1(2, CEIL_F) -INSTR1(2, RNDNE_F) -INSTR1(2, RNDAZ_F) -INSTR1(2, TRUNC_F) -INSTR2(2, ADD_U) -INSTR2(2, ADD_S) -INSTR2(2, SUB_U) -INSTR2(2, SUB_S) -INSTR2(2, CMPS_U) -INSTR2(2, CMPS_S) -INSTR2(2, MIN_U) -INSTR2(2, MIN_S) -INSTR2(2, MAX_U) -INSTR2(2, MAX_S) -INSTR1(2, ABSNEG_S) -INSTR2(2, AND_B) -INSTR2(2, OR_B) -INSTR1(2, NOT_B) -INSTR2(2, XOR_B) -INSTR2(2, CMPV_U) -INSTR2(2, CMPV_S) -INSTR2(2, MUL_U) -INSTR2(2, MUL_S) -INSTR2(2, MULL_U) -INSTR1(2, BFREV_B) -INSTR1(2, CLZ_S) -INSTR1(2, CLZ_B) -INSTR2(2, SHL_B) -INSTR2(2, SHR_B) -INSTR2(2, ASHR_B) -INSTR2(2, BARY_F) -INSTR2(2, MGEN_B) -INSTR2(2, GETBIT_B) -INSTR1(2, SETRM) -INSTR1(2, CBITS_B) -INSTR2(2, SHB) -INSTR2(2, MSAD) +INSTR2(ADD_F) +INSTR2(MIN_F) +INSTR2(MAX_F) +INSTR2(MUL_F) +INSTR1(SIGN_F) +INSTR2(CMPS_F) +INSTR1(ABSNEG_F) +INSTR2(CMPV_F) +INSTR1(FLOOR_F) +INSTR1(CEIL_F) +INSTR1(RNDNE_F) +INSTR1(RNDAZ_F) +INSTR1(TRUNC_F) +INSTR2(ADD_U) +INSTR2(ADD_S) +INSTR2(SUB_U) +INSTR2(SUB_S) +INSTR2(CMPS_U) +INSTR2(CMPS_S) +INSTR2(MIN_U) +INSTR2(MIN_S) +INSTR2(MAX_U) +INSTR2(MAX_S) +INSTR1(ABSNEG_S) +INSTR2(AND_B) +INSTR2(OR_B) +INSTR1(NOT_B) +INSTR2(XOR_B) +INSTR2(CMPV_U) +INSTR2(CMPV_S) +INSTR2(MUL_U) +INSTR2(MUL_S) +INSTR2(MULL_U) +INSTR1(BFREV_B) +INSTR1(CLZ_S) +INSTR1(CLZ_B) +INSTR2(SHL_B) +INSTR2(SHR_B) +INSTR2(ASHR_B) +INSTR2(BARY_F) +INSTR2(MGEN_B) +INSTR2(GETBIT_B) +INSTR1(SETRM) +INSTR1(CBITS_B) +INSTR2(SHB) +INSTR2(MSAD) /* cat3 instructions: */ -INSTR3(3, MAD_U16) -INSTR3(3, MADSH_U16) -INSTR3(3, MAD_S16) -INSTR3(3, MADSH_M16) -INSTR3(3, MAD_U24) -INSTR3(3, MAD_S24) -INSTR3(3, MAD_F16) -INSTR3(3, MAD_F32) -INSTR3(3, SEL_B16) -INSTR3(3, SEL_B32) -INSTR3(3, SEL_S16) -INSTR3(3, SEL_S32) -INSTR3(3, SEL_F16) -INSTR3(3, SEL_F32) -INSTR3(3, SAD_S16) -INSTR3(3, SAD_S32) +INSTR3(MAD_U16) +INSTR3(MADSH_U16) +INSTR3(MAD_S16) +INSTR3(MADSH_M16) +INSTR3(MAD_U24) +INSTR3(MAD_S24) +INSTR3(MAD_F16) +INSTR3(MAD_F32) +INSTR3(SEL_B16) +INSTR3(SEL_B32) +INSTR3(SEL_S16) +INSTR3(SEL_S32) +INSTR3(SEL_F16) +INSTR3(SEL_F32) +INSTR3(SAD_S16) +INSTR3(SAD_S32) /* cat4 instructions: */ -INSTR1(4, RCP) -INSTR1(4, RSQ) -INSTR1(4, LOG2) -INSTR1(4, EXP2) -INSTR1(4, SIN) -INSTR1(4, COS) -INSTR1(4, SQRT) +INSTR1(RCP) +INSTR1(RSQ) +INSTR1(LOG2) +INSTR1(EXP2) +INSTR1(SIN) +INSTR1(COS) +INSTR1(SQRT) /* cat5 instructions: */ -INSTR1(5, DSX) -INSTR1(5, DSY) +INSTR1(DSX) +INSTR1(DSY) static inline struct ir3_instruction * ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, @@ -1082,7 +1100,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, struct ir3_instruction *sam; struct ir3_register *reg; - sam = ir3_instr_create(block, 5, opc); + sam = ir3_instr_create(block, opc); sam->flags |= flags; ir3_reg_create(sam, 0, 0)->wrmask = wrmask; if (src0) { @@ -1103,9 +1121,9 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, } /* cat6 instructions: */ -INSTR2(6, LDLV) -INSTR2(6, LDG) -INSTR3(6, STG) +INSTR2(LDLV) +INSTR2(LDG) +INSTR3(STG) /* ************************************************************************* */ /* split this out or find some helper to use.. like main/bitset.h.. */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 3d656d4a34d..245b61f31e5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -286,7 +286,7 @@ create_immed(struct ir3_block *block, uint32_t val) { struct ir3_instruction *mov; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -366,7 +366,7 @@ create_uniform(struct ir3_compile *ctx, unsigned n) { struct ir3_instruction *mov; - mov = ir3_instr_create(ctx->block, 1, 0); + mov = ir3_instr_create(ctx->block, OPC_MOV); /* TODO get types right? */ mov->cat1.src_type = TYPE_F32; mov->cat1.dst_type = TYPE_F32; @@ -382,7 +382,7 @@ create_uniform_indirect(struct ir3_compile *ctx, int n, { struct ir3_instruction *mov; - mov = ir3_instr_create(ctx->block, 1, 0); + mov = ir3_instr_create(ctx->block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -402,7 +402,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr, if (arrsz == 0) return NULL; - collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz); + collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); ir3_reg_create(collect, 0, 0); /* dst */ for (unsigned i = 0; i < arrsz; i++) ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i]; @@ -418,7 +418,7 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, int n, struct ir3_instruction *mov; struct ir3_register *src; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -441,7 +441,7 @@ create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, int n, struct ir3_instruction *mov; struct ir3_register *src; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -469,7 +469,7 @@ create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, int n, struct ir3_instruction *mov; struct ir3_register *dst; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | @@ -492,7 +492,7 @@ create_input(struct ir3_block *block, unsigned n) { struct ir3_instruction *in; - in = ir3_instr_create(block, -1, OPC_META_INPUT); + in = ir3_instr_create(block, OPC_META_INPUT); in->inout.block = block; ir3_reg_create(in, n, 0); @@ -617,8 +617,7 @@ split_dest(struct ir3_block *block, struct ir3_instruction **dst, { struct ir3_instruction *prev = NULL; for (int i = 0, j = 0; i < n; i++) { - struct ir3_instruction *split = - ir3_instr_create(block, -1, OPC_META_FO); + struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); ir3_reg_create(split, 0, IR3_REG_SSA); ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src; split->fo.off = i; @@ -1631,7 +1630,7 @@ emit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi) dst = get_dst(ctx, &nphi->dest, 1); - phi = ir3_instr_create2(ctx->block, -1, OPC_META_PHI, + phi = ir3_instr_create2(ctx->block, OPC_META_PHI, 1 + exec_list_length(&nphi->srcs)); ir3_reg_create(phi, 0, 0); /* dst */ phi->phi.nphi = nphi; @@ -1651,7 +1650,7 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block) nir_phi_instr *nphi; /* phi's only come at start of block: */ - if (!(is_meta(instr) && (instr->opc == OPC_META_PHI))) + if (instr->opc != OPC_META_PHI) break; if (!instr->phi.nphi) @@ -1662,6 +1661,16 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block) foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) { struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0]; + + /* NOTE: src might not be in the same block as it comes from + * according to the phi.. but in the end the backend assumes + * it will be able to assign the same register to each (which + * only works if it is assigned in the src block), so insert + * an extra mov to make sure the phi src is assigned in the + * block it comes from: + */ + src = ir3_MOV(get_block(ctx, nsrc->pred), src, TYPE_U32); + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; } } @@ -2144,7 +2153,7 @@ emit_instructions(struct ir3_compile *ctx) if (ctx->so->type == SHADER_FRAGMENT) { // TODO maybe a helper for fi since we need it a few places.. struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, -1, OPC_META_FI); + instr = ir3_instr_create(ctx->block, OPC_META_FI); ir3_reg_create(instr, 0, 0); ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ @@ -2323,12 +2332,12 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, * in which case we need to propagate the half-reg flag * up to the definer so that RA sees it: */ - if (is_meta(out) && (out->opc == OPC_META_FO)) { + if (out->opc == OPC_META_FO) { out = out->regs[1]->instr; out->regs[0]->flags |= IR3_REG_HALF; } - if (out->category == 1) { + if (out->opc == OPC_MOV) { out->cat1.dst_type = half_type(out->cat1.dst_type); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 1cc211a7663..6037becf22f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -58,14 +58,14 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) return false; /* TODO: remove this hack: */ - if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO)) + if (src_instr->opc == OPC_META_FO) return false; /* TODO: we currently don't handle left/right neighbors * very well when inserting parallel-copies into phi.. * to avoid problems don't eliminate a mov coming out * of phi.. */ - if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI)) + if (src_instr->opc == OPC_META_PHI) return false; return true; } @@ -96,7 +96,7 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, return false; /* clear flags that are 'ok' */ - switch (instr->category) { + switch (opc_cat(instr->opc)) { case 1: valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; if (flags & ~valid_flags) @@ -111,6 +111,19 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, valid_flags = IR3_REG_IMMED; if (flags & ~valid_flags) return false; + + if (flags & IR3_REG_IMMED) { + /* doesn't seem like we can have immediate src for store + * instructions: + * + * TODO this restriction could also apply to load instructions, + * but for load instructions this arg is the address (and not + * really sure any good way to test a hard-coded immed addr src) + */ + if (is_store(instr) && (n == 1)) + return false; + } + break; case 2: valid_flags = ir3_cat2_absneg(instr->opc) | @@ -176,8 +189,10 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, /* propagate register flags from src to dst.. negates need special * handling to cancel each other out. */ -static void combine_flags(unsigned *dstflags, unsigned srcflags) +static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) { + unsigned srcflags = src->regs[1]->flags; + /* if what we are combining into already has (abs) flags, * we can drop (neg) from src: */ @@ -203,15 +218,15 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags) *dstflags |= srcflags & IR3_REG_IMMED; *dstflags |= srcflags & IR3_REG_RELATIV; *dstflags |= srcflags & IR3_REG_ARRAY; -} -/* the "plain" MAD's (ie. the ones that don't shift first src prior to - * multiply) can swap their first two srcs if src[0] is !CONST and - * src[1] is CONST: - */ -static bool is_valid_mad(struct ir3_instruction *instr) -{ - return (instr->category == 3) && is_mad(instr->opc); + /* if src of the src is boolean we can drop the (abs) since we know + * the source value is already a postitive integer. This cleans + * up the absnegs that get inserted when converting between nir and + * native boolean (see ir3_b2n/n2b) + */ + struct ir3_instruction *srcsrc = ssa(src->regs[1]); + if (srcsrc && is_bool(srcsrc)) + *dstflags &= ~IR3_REG_SABS; } /** @@ -226,12 +241,18 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) { struct ir3_instruction *src = ssa(reg); + /* don't propagate copies into a PHI, since we don't know if the + * src block executed: + */ + if (instr->opc == OPC_META_PHI) + return; + if (is_eligible_mov(src, true)) { /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; - combine_flags(&new_flags, src_reg->flags); + combine_flags(&new_flags, src); if (valid_flags(instr, n, new_flags)) { if (new_flags & IR3_REG_ARRAY) { @@ -252,13 +273,17 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; - combine_flags(&new_flags, src_reg->flags); + combine_flags(&new_flags, src); if (!valid_flags(instr, n, new_flags)) { /* special case for "normal" mad instructions, we can * try swapping the first two args if that fits better. + * + * the "plain" MAD's (ie. the ones that don't shift first + * src prior to multiply) can swap their first two srcs if + * src[0] is !CONST and src[1] is CONST: */ - if ((n == 1) && is_valid_mad(instr) && + if ((n == 1) && is_mad(instr->opc) && !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && valid_flags(instr, 0, new_flags)) { /* swap src[0] and src[1]: */ @@ -292,7 +317,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) * just somehow don't work out. This restriction may only * apply if the first src is also CONST. */ - if ((instr->category == 3) && (n == 2) && + if ((opc_cat(instr->opc) == 3) && (n == 2) && (src_reg->flags & IR3_REG_RELATIV) && (src_reg->array.offset == 0)) return; @@ -328,10 +353,9 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) if (src_reg->flags & IR3_REG_IMMED) { int32_t iim_val = src_reg->iim_val; - debug_assert((instr->category == 1) || - (instr->category == 6) || - ((instr->category == 2) && - ir3_cat2_int(instr->opc))); + debug_assert((opc_cat(instr->opc) == 1) || + (opc_cat(instr->opc) == 6) || + ir3_cat2_int(instr->opc)); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); @@ -343,7 +367,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) iim_val = ~iim_val; /* other than category 1 (mov) we can only encode up to 10 bits: */ - if ((instr->category == 1) || !(iim_val & ~0x3ff)) { + if ((instr->opc == OPC_MOV) || !(iim_val & ~0x3ff)) { new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 6d294f1a48c..c3f6de965ce 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -74,8 +74,7 @@ int ir3_delayslots(struct ir3_instruction *assigner, if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || is_mem(consumer)) { return 6; - } else if ((consumer->category == 3) && - (is_mad(consumer->opc) || is_madsh(consumer->opc)) && + } else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 3)) { /* special case, 3rd src to cat3 not required on first cycle */ return 1; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index ca28aefd502..cd59080b0f1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -63,14 +63,13 @@ static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr) debug_assert(instr->regs_count == 1); - in = ir3_instr_create(instr->block, -1, OPC_META_INPUT); + in = ir3_instr_create(instr->block, OPC_META_INPUT); in->inout.block = instr->block; ir3_reg_create(in, instr->regs[0]->num, 0); /* create src reg for meta:in and fixup to now be a mov: */ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in; - instr->category = 1; - instr->opc = 0; + instr->opc = OPC_MOV; instr->cat1.src_type = TYPE_F32; instr->cat1.dst_type = TYPE_F32; @@ -117,7 +116,7 @@ restart: conflicts(instr->cp.right, right); /* RA can't yet deal very well w/ group'd phi's: */ - if (is_meta(instr) && (instr->opc == OPC_META_PHI)) + if (instr->opc == OPC_META_PHI) conflict = true; /* we also can't have an instr twice in the group: */ @@ -168,7 +167,7 @@ instr_find_neighbors(struct ir3_instruction *instr) if (ir3_instr_check_mark(instr)) return; - if (is_meta(instr) && (instr->opc == OPC_META_FI)) + if (instr->opc == OPC_META_FI) group_n(&instr_ops, instr, instr->regs_count - 1); foreach_ssa_src(src, instr) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index e94293f6d6b..77cd0e622f0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -146,7 +146,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) * clever if we were aware of this during scheduling, but * this should be a pretty rare case: */ - if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { + if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) { struct ir3_instruction *nop; nop = ir3_NOP(block); nop->flags |= IR3_INSTR_SS; @@ -154,7 +154,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } /* need to be able to set (ss) on first instruction: */ - if (list_empty(&block->instr_list) && (n->category >= 5)) + if (list_empty(&block->instr_list) && (opc_cat(n->opc) >= 5)) ir3_NOP(block); if (is_nop(n) && !list_empty(&block->instr_list)) { @@ -209,7 +209,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) struct ir3_instruction *baryf; /* (ss)bary.f (ei)r63.x, 0, r0.x */ - baryf = ir3_instr_create(block, 2, OPC_BARY_F); + baryf = ir3_instr_create(block, OPC_BARY_F); baryf->flags |= IR3_INSTR_SS; ir3_reg_create(baryf, regid(63, 0), 0); ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index ba0c4a57aa3..8aebf21a1be 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -35,9 +35,12 @@ static void print_instr_name(struct ir3_instruction *instr) { + if (!instr) + return; #ifdef DEBUG printf("%04u:", instr->serialno); #endif + printf("%04u:", instr->name); printf("%03u: ", instr->depth); if (instr->flags & IR3_INSTR_SY) @@ -61,7 +64,7 @@ static void print_instr_name(struct ir3_instruction *instr) } break; } - } else if (instr->category == 1) { + } else if (instr->opc == OPC_MOV) { static const char *type[] = { [TYPE_F16] = "f16", [TYPE_F32] = "f32", @@ -146,16 +149,6 @@ tab(int lvl) printf("\t"); } -static uint32_t -block_id(struct ir3_block *block) -{ -#ifdef DEBUG - return block->serialno; -#else - return (uint32_t)(unsigned long)block; -#endif -} - static void print_instr(struct ir3_instruction *instr, int lvl) { @@ -191,10 +184,8 @@ print_instr(struct ir3_instruction *instr, int lvl) printf("]"); } - if (is_meta(instr)) { - if (instr->opc == OPC_META_FO) { - printf(", off=%d", instr->fo.off); - } + if (instr->opc == OPC_META_FO) { + printf(", off=%d", instr->fo.off); } if (is_flow(instr) && instr->cat0.target) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index bcad96e8a30..ed3030d722a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -31,6 +31,8 @@ #include "util/ralloc.h" #include "util/bitset.h" +#include "freedreno_util.h" + #include "ir3.h" #include "ir3_compiler.h" @@ -342,7 +344,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, return id->defn; } - if (is_meta(instr) && (instr->opc == OPC_META_FI)) { + if (instr->opc == OPC_META_FI) { /* What about the case where collect is subset of array, we * need to find the distance between where actual array starts * and fanin.. that probably doesn't happen currently. @@ -436,7 +438,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, } } - if (is_meta(d) && (d->opc == OPC_META_PHI)) { + if (d->opc == OPC_META_PHI) { /* we have already inserted parallel-copies into * the phi, so we don't need to chase definers */ @@ -456,7 +458,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, d = dd; } - if (is_meta(d) && (d->opc == OPC_META_FO)) { + if (d->opc == OPC_META_FO) { struct ir3_instruction *dd; int dsz, doff; @@ -810,6 +812,22 @@ ra_compute_livein_liveout(struct ir3_ra_ctx *ctx) } static void +print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt) +{ + bool first = true; + debug_printf(" %s:", name); + for (unsigned i = 0; i < cnt; i++) { + if (BITSET_TEST(bs, i)) { + if (!first) + debug_printf(","); + debug_printf(" %04u", i); + first = false; + } + } + debug_printf("\n"); +} + +static void ra_add_interference(struct ir3_ra_ctx *ctx) { struct ir3 *ir = ctx->ir; @@ -831,12 +849,24 @@ ra_add_interference(struct ir3_ra_ctx *ctx) /* update per-block livein/liveout: */ while (ra_compute_livein_liveout(ctx)) {} + if (fd_mesa_debug & FD_DBG_OPTMSGS) { + debug_printf("AFTER LIVEIN/OUT:\n"); + ir3_print(ir); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + struct ir3_ra_block_data *bd = block->data; + debug_printf("block%u:\n", block_id(block)); + print_bitset("def", bd->def, ctx->alloc_count); + print_bitset("use", bd->use, ctx->alloc_count); + print_bitset("l/i", bd->livein, ctx->alloc_count); + print_bitset("l/o", bd->liveout, ctx->alloc_count); + } + } + /* extend start/end ranges based on livein/liveout info from cfg: */ - unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { struct ir3_ra_block_data *bd = block->data; - for (unsigned i = 0; i < bitset_words; i++) { + for (unsigned i = 0; i < ctx->alloc_count; i++) { if (BITSET_TEST(bd->livein, i)) { ctx->def[i] = MIN2(ctx->def[i], block->start_ip); ctx->use[i] = MAX2(ctx->use[i], block->start_ip); @@ -869,7 +899,7 @@ ra_add_interference(struct ir3_ra_ctx *ctx) /* some instructions need fix-up if dst register is half precision: */ static void fixup_half_instr_dst(struct ir3_instruction *instr) { - switch (instr->category) { + switch (opc_cat(instr->opc)) { case 1: /* move instructions */ instr->cat1.dst_type = half_type(instr->cat1.dst_type); break; @@ -910,10 +940,12 @@ static void fixup_half_instr_dst(struct ir3_instruction *instr) /* some instructions need fix-up if src register is half precision: */ static void fixup_half_instr_src(struct ir3_instruction *instr) { - switch (instr->category) { - case 1: /* move instructions */ + switch (instr->opc) { + case OPC_MOV: instr->cat1.src_type = half_type(instr->cat1.src_type); break; + default: + break; } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 8f640febc5d..b56da304f92 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -511,8 +511,7 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) * occupied), and move remaining to depth sorted list: */ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if (is_meta(instr) && ((instr->opc == OPC_META_INPUT) || - (instr->opc == OPC_META_PHI))) { + if ((instr->opc == OPC_META_INPUT) || (instr->opc == OPC_META_PHI)) { schedule(ctx, instr); } else { ir3_insert_by_depth(instr, &ctx->depth_list); @@ -627,14 +626,29 @@ static void sched_insert_parallel_copies(struct ir3_block *block) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - if (is_meta(instr) && (instr->opc == OPC_META_PHI)) { - struct ir3_register *reg; + if (instr->opc == OPC_META_PHI) { + struct ir3_register *reg, *reg2; foreach_src(reg, instr) { struct ir3_instruction *src = reg->instr; - struct ir3_instruction *mov = - ir3_MOV(src->block, src, TYPE_U32); - mov->regs[0]->flags |= IR3_REG_PHI_SRC; - mov->regs[0]->instr = instr; + struct ir3_instruction *mov = NULL; + + /* after CP we could end up w/ duplicate phi srcs: */ + foreach_src(reg2, instr) { + if (reg == reg2) + break; + /* reg2 is before reg1 so already an inserted mov: */ + else if (reg2->instr->regs[1]->instr == src) { + mov = reg2->instr; + break; + } + } + + if (!mov) { + mov = ir3_MOV(src->block, src, TYPE_U32); + mov->regs[0]->flags |= IR3_REG_PHI_SRC; + mov->regs[0]->instr = instr; + } + reg->instr = mov; } } diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index f4aa310ecdc..68e32e51c34 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -269,6 +269,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 7812c826250..142d6f1fa21 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -498,6 +498,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 2529b546564..6a5f906adc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -319,6 +319,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 66e7b2e8243..fea388685fa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2824,7 +2824,7 @@ FlatteningPass::visit(BasicBlock *bb) !isSurfaceOp(insn->op) && // not confirmed insn->op != OP_LINTERP && // probably just nve4 insn->op != OP_PINTERP && // probably just nve4 - ((insn->op != OP_LOAD && insn->op != OP_STORE) || + ((insn->op != OP_LOAD && insn->op != OP_STORE && insn->op != OP_ATOM) || (typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) && !insn->isNop()) { insn->join = 1; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index b105c6aeb80..db7c2d15fb1 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -192,6 +192,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ba5e5003b69..20fb61b51f4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -245,6 +245,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index ec2340ee0c3..c41912a6037 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -251,6 +251,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 1c3bb64f0e4..b3a7f049e10 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -214,6 +214,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_BUFFER_OBJECT: case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; /* SWTCL-only features. */ diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index f4b669000dc..6f171487f92 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -83,29 +83,26 @@ writable images will consume TEX slots, VTX slots too because of linear indexing */ -struct r600_resource* r600_compute_buffer_alloc_vram( - struct r600_screen *screen, - unsigned size) +struct r600_resource *r600_compute_buffer_alloc_vram(struct r600_screen *screen, + unsigned size) { - struct pipe_resource * buffer = NULL; + struct pipe_resource *buffer = NULL; assert(size); - buffer = pipe_buffer_create( - (struct pipe_screen*) screen, - PIPE_BIND_CUSTOM, - PIPE_USAGE_IMMUTABLE, - size); + buffer = pipe_buffer_create((struct pipe_screen*) screen, + PIPE_BIND_CUSTOM, + PIPE_USAGE_IMMUTABLE, + size); return (struct r600_resource *)buffer; } -static void evergreen_set_rat( - struct r600_pipe_compute *pipe, - unsigned id, - struct r600_resource* bo, - int start, - int size) +static void evergreen_set_rat(struct r600_pipe_compute *pipe, + unsigned id, + struct r600_resource *bo, + int start, + int size) { struct pipe_surface rat_templ; struct r600_surface *surf = NULL; @@ -145,11 +142,10 @@ static void evergreen_set_rat( evergreen_init_color_surface_rat(rctx, surf); } -static void evergreen_cs_set_vertex_buffer( - struct r600_context * rctx, - unsigned vb_index, - unsigned offset, - struct pipe_resource * buffer) +static void evergreen_cs_set_vertex_buffer(struct r600_context *rctx, + unsigned vb_index, + unsigned offset, + struct pipe_resource *buffer) { struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state; struct pipe_vertex_buffer *vb = &state->vb[vb_index]; @@ -166,12 +162,11 @@ static void evergreen_cs_set_vertex_buffer( r600_mark_atom_dirty(rctx, &state->atom); } -static void evergreen_cs_set_constant_buffer( - struct r600_context * rctx, - unsigned cb_index, - unsigned offset, - unsigned size, - struct pipe_resource * buffer) +static void evergreen_cs_set_constant_buffer(struct r600_context *rctx, + unsigned cb_index, + unsigned offset, + unsigned size, + struct pipe_resource *buffer) { struct pipe_constant_buffer cb; cb.buffer_size = size; @@ -182,16 +177,6 @@ static void evergreen_cs_set_constant_buffer( rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, &cb); } -static const struct u_resource_vtbl r600_global_buffer_vtbl = -{ - u_default_resource_get_handle, /* get_handle */ - r600_compute_global_buffer_destroy, /* resource_destroy */ - r600_compute_global_transfer_map, /* transfer_map */ - r600_compute_global_transfer_flush_region,/* transfer_flush_region */ - r600_compute_global_transfer_unmap, /* transfer_unmap */ - r600_compute_global_transfer_inline_write /* transfer_inline_write */ -}; - /* We need to define these R600 registers here, because we can't include * evergreend.h and r600d.h. */ @@ -256,33 +241,32 @@ static void r600_destroy_shader(struct r600_bytecode *bc) FREE(bc->bytecode); } -void *evergreen_create_compute_state( - struct pipe_context *ctx_, - const const struct pipe_compute_state *cso) +static void *evergreen_create_compute_state(struct pipe_context *ctx, + const const struct pipe_compute_state *cso) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); #ifdef HAVE_OPENCL - const struct pipe_llvm_program_header * header; + const struct pipe_llvm_program_header *header; const char *code; void *p; boolean use_kill; - COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n"); + COMPUTE_DBG(rctx->screen, "*** evergreen_create_compute_state\n"); header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); radeon_shader_binary_init(&shader->binary); radeon_elf_read(code, header->num_bytes, &shader->binary); r600_create_shader(&shader->bc, &shader->binary, &use_kill); - shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen, shader->bc.ndw * 4); - p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); + p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); - ctx->b.ws->buffer_unmap(shader->code_bo->buf); + rctx->b.ws->buffer_unmap(shader->code_bo->buf); #endif - shader->ctx = ctx; + shader->ctx = rctx; shader->local_size = cso->req_local_mem; shader->private_size = cso->req_private_mem; shader->input_size = cso->req_input_mem; @@ -290,12 +274,13 @@ void *evergreen_create_compute_state( return shader; } -void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state) +static void evergreen_delete_compute_state(struct pipe_context *ctx, void *state) { - struct r600_context *ctx = (struct r600_context *)ctx_; - COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n"); + struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_compute *shader = state; + COMPUTE_DBG(rctx->screen, "*** evergreen_delete_compute_state\n"); + if (!shader) return; @@ -307,13 +292,13 @@ void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state) FREE(shader); } -static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) +static void evergreen_bind_compute_state(struct pipe_context *ctx, void *state) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; - COMPUTE_DBG(ctx->screen, "*** evergreen_bind_compute_state\n"); + COMPUTE_DBG(rctx->screen, "*** evergreen_bind_compute_state\n"); - ctx->cs_shader_state.shader = (struct r600_pipe_compute *)state; + rctx->cs_shader_state.shader = (struct r600_pipe_compute *)state; } /* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit @@ -327,23 +312,20 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) * (x,y,z) * DWORDS 9+ : Kernel parameters */ -void evergreen_compute_upload_input( - struct pipe_context *ctx_, - const uint *block_layout, - const uint *grid_layout, - const void *input) +static void evergreen_compute_upload_input(struct pipe_context *ctx, + const struct pipe_grid_info *info) { - struct r600_context *ctx = (struct r600_context *)ctx_; - struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; unsigned i; /* We need to reserve 9 dwords (36 bytes) for implicit kernel * parameters. */ unsigned input_size = shader->input_size + 36; - uint32_t * num_work_groups_start; - uint32_t * global_size_start; - uint32_t * local_size_start; - uint32_t * kernel_parameters_start; + uint32_t *num_work_groups_start; + uint32_t *global_size_start; + uint32_t *local_size_start; + uint32_t *kernel_parameters_start; struct pipe_box box; struct pipe_transfer *transfer = NULL; @@ -354,12 +336,12 @@ void evergreen_compute_upload_input( if (!shader->kernel_param) { /* Add space for the grid dimensions */ shader->kernel_param = (struct r600_resource *) - pipe_buffer_create(ctx_->screen, PIPE_BIND_CUSTOM, + pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, input_size); } u_box_1d(0, input_size, &box); - num_work_groups_start = ctx_->transfer_map(ctx_, + num_work_groups_start = ctx->transfer_map(ctx, (struct pipe_resource*)shader->kernel_param, 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, &box, &transfer); @@ -368,34 +350,33 @@ void evergreen_compute_upload_input( kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4); /* Copy the work group size */ - memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint)); + memcpy(num_work_groups_start, info->grid, 3 * sizeof(uint)); /* Copy the global size */ for (i = 0; i < 3; i++) { - global_size_start[i] = grid_layout[i] * block_layout[i]; + global_size_start[i] = info->grid[i] * info->block[i]; } /* Copy the local dimensions */ - memcpy(local_size_start, block_layout, 3 * sizeof(uint)); + memcpy(local_size_start, info->block, 3 * sizeof(uint)); /* Copy the kernel inputs */ - memcpy(kernel_parameters_start, input, shader->input_size); + memcpy(kernel_parameters_start, info->input, shader->input_size); for (i = 0; i < (input_size / 4); i++) { - COMPUTE_DBG(ctx->screen, "input %i : %u\n", i, + COMPUTE_DBG(rctx->screen, "input %i : %u\n", i, ((unsigned*)num_work_groups_start)[i]); } - ctx_->transfer_unmap(ctx_, transfer); + ctx->transfer_unmap(ctx, transfer); /* ID=0 is reserved for the parameters */ - evergreen_cs_set_constant_buffer(ctx, 0, 0, input_size, + evergreen_cs_set_constant_buffer(rctx, 0, 0, input_size, (struct pipe_resource*)shader->kernel_param); } -static void evergreen_emit_direct_dispatch( - struct r600_context *rctx, - const uint *block_layout, const uint *grid_layout) +static void evergreen_emit_dispatch(struct r600_context *rctx, + const struct pipe_grid_info *info) { int i; struct radeon_winsys_cs *cs = rctx->b.gfx.cs; @@ -411,15 +392,15 @@ static void evergreen_emit_direct_dispatch( /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { - group_size *= block_layout[i]; + group_size *= info->block[i]; } for (i = 0; i < 3; i++) { - grid_size *= grid_layout[i]; + grid_size *= info->grid[i]; } /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */ - num_waves = (block_layout[0] * block_layout[1] * block_layout[2] + + num_waves = (info->block[0] * info->block[1] * info->block[2] + wave_divisor - 1) / wave_divisor; COMPUTE_DBG(rctx->screen, "Using %u pipes, " @@ -438,9 +419,9 @@ static void evergreen_emit_direct_dispatch( group_size); radeon_compute_set_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */ - radeon_emit(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */ - radeon_emit(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */ + radeon_emit(cs, info->block[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */ + radeon_emit(cs, info->block[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */ + radeon_emit(cs, info->block[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */ if (rctx->b.chip_class < CAYMAN) { assert(lds_size <= 8192); @@ -455,22 +436,22 @@ static void evergreen_emit_direct_dispatch( /* Dispatch packet */ radeon_emit(cs, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0)); - radeon_emit(cs, grid_layout[0]); - radeon_emit(cs, grid_layout[1]); - radeon_emit(cs, grid_layout[2]); + radeon_emit(cs, info->grid[0]); + radeon_emit(cs, info->grid[1]); + radeon_emit(cs, info->grid[2]); /* VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN */ radeon_emit(cs, 1); } -static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, - const uint *grid_layout) +static void compute_emit_cs(struct r600_context *rctx, + const struct pipe_grid_info *info) { - struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; unsigned i; /* make sure that the gfx ring is only one active */ - if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) { - ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + if (rctx->b.dma.cs && rctx->b.dma.cs->cdw) { + rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL); } /* Initialize all the compute-related registers. @@ -478,20 +459,20 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ - r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); + r600_emit_command_buffer(cs, &rctx->start_compute_cs_cmd); /* emit config state */ - if (ctx->b.chip_class == EVERGREEN) - r600_emit_atom(ctx, &ctx->config_state.atom); + if (rctx->b.chip_class == EVERGREEN) + r600_emit_atom(rctx, &rctx->config_state.atom); - ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; - r600_flush_emit(ctx); + rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; + r600_flush_emit(rctx); /* Emit colorbuffers. */ /* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */ - for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) { - struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; - unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, + for (i = 0; i < 8 && i < rctx->framebuffer.state.nr_cbufs; i++) { + struct r600_surface *cb = (struct r600_surface*)rctx->framebuffer.state.cbufs[i]; + unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER); @@ -520,51 +501,51 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK, - ctx->compute_cb_target_mask); + rctx->compute_cb_target_mask); /* Emit vertex buffer state */ - ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); - r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); + rctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(rctx->cs_vertex_buffer_state.dirty_mask); + r600_emit_atom(rctx, &rctx->cs_vertex_buffer_state.atom); /* Emit constant buffer state */ - r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); + r600_emit_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); /* Emit sampler state */ - r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom); + r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom); /* Emit sampler view (texture resource) state */ - r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom); + r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom); /* Emit compute shader state */ - r600_emit_atom(ctx, &ctx->cs_shader_state.atom); + r600_emit_atom(rctx, &rctx->cs_shader_state.atom); /* Emit dispatch state and dispatch packet */ - evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); + evergreen_emit_dispatch(rctx, info); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ - ctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | + rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE; - r600_flush_emit(ctx); - ctx->b.flags = 0; + r600_flush_emit(rctx); + rctx->b.flags = 0; - if (ctx->b.chip_class >= CAYMAN) { - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4); + if (rctx->b.chip_class >= CAYMAN) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); /* DEALLOC_STATE prevents the GPU from hanging when a * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set. */ - cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0); - cs->buf[cs->cdw++] = 0; + radeon_emit(cs, PKT3C(PKT3_DEALLOC_STATE, 0, 0)); + radeon_emit(cs, 0); } #if 0 - COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw); + COMPUTE_DBG(rctx->screen, "cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { - COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]); + COMPUTE_DBG(rctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]); } #endif @@ -574,9 +555,8 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /** * Emit function for r600_cs_shader_state atom */ -void evergreen_emit_cs_shader( - struct r600_context *rctx, - struct r600_atom *atom) +void evergreen_emit_cs_shader(struct r600_context *rctx, + struct r600_atom *atom) { struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; @@ -604,35 +584,35 @@ void evergreen_emit_cs_shader( RADEON_PRIO_USER_SHADER)); } -static void evergreen_launch_grid( - struct pipe_context *ctx_, const struct pipe_grid_info *info) +static void evergreen_launch_grid(struct pipe_context *ctx, + const struct pipe_grid_info *info) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; #ifdef HAVE_OPENCL - struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; + struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; boolean use_kill; - ctx->cs_shader_state.pc = info->pc; + rctx->cs_shader_state.pc = info->pc; /* Get the config information for this kernel. */ r600_shader_binary_read_config(&shader->binary, &shader->bc, info->pc, &use_kill); #endif - COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc); + COMPUTE_DBG(rctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc); - evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input); - compute_emit_cs(ctx, info->block, info->grid); + evergreen_compute_upload_input(ctx, info); + compute_emit_cs(rctx, info); } -static void evergreen_set_compute_resources(struct pipe_context * ctx_, - unsigned start, unsigned count, - struct pipe_surface ** surfaces) +static void evergreen_set_compute_resources(struct pipe_context *ctx, + unsigned start, unsigned count, + struct pipe_surface **surfaces) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; struct r600_surface **resources = (struct r600_surface **)surfaces; - COMPUTE_DBG(ctx->screen, "*** evergreen_set_compute_resources: start = %u count = %u\n", + COMPUTE_DBG(rctx->screen, "*** evergreen_set_compute_resources: start = %u count = %u\n", start, count); for (unsigned i = 0; i < count; i++) { @@ -646,31 +626,31 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_, if (resources[i]->base.writable) { assert(i+1 < 12); - evergreen_set_rat(ctx->cs_shader_state.shader, i+1, + evergreen_set_rat(rctx->cs_shader_state.shader, i+1, (struct r600_resource *)resources[i]->base.texture, buffer->chunk->start_in_dw*4, resources[i]->base.texture->width0); } - evergreen_cs_set_vertex_buffer(ctx, vtx_id, + evergreen_cs_set_vertex_buffer(rctx, vtx_id, buffer->chunk->start_in_dw * 4, resources[i]->base.texture); } } } -static void evergreen_set_global_binding( - struct pipe_context *ctx_, unsigned first, unsigned n, - struct pipe_resource **resources, - uint32_t **handles) +static void evergreen_set_global_binding(struct pipe_context *ctx, + unsigned first, unsigned n, + struct pipe_resource **resources, + uint32_t **handles) { - struct r600_context *ctx = (struct r600_context *)ctx_; - struct compute_memory_pool *pool = ctx->screen->global_pool; + struct r600_context *rctx = (struct r600_context *)ctx; + struct compute_memory_pool *pool = rctx->screen->global_pool; struct r600_resource_global **buffers = (struct r600_resource_global **)resources; unsigned i; - COMPUTE_DBG(ctx->screen, "*** evergreen_set_global_binding first = %u n = %u\n", + COMPUTE_DBG(rctx->screen, "*** evergreen_set_global_binding first = %u n = %u\n", first, n); if (!resources) { @@ -687,7 +667,7 @@ static void evergreen_set_global_binding( buffers[i]->chunk->status |= ITEM_FOR_PROMOTING; } - if (compute_memory_finalize_pending(pool, ctx_) == -1) { + if (compute_memory_finalize_pending(pool, ctx) == -1) { /* XXX: Unset */ return; } @@ -705,8 +685,8 @@ static void evergreen_set_global_binding( *(handles[i]) = util_cpu_to_le32(handle); } - evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4); - evergreen_cs_set_vertex_buffer(ctx, 1, 0, + evergreen_set_rat(rctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4); + evergreen_cs_set_vertex_buffer(rctx, 1, 0, (struct pipe_resource*)pool->bo); } @@ -721,9 +701,9 @@ static void evergreen_set_global_binding( * functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending * on the GPU family. */ -void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) +void evergreen_init_atom_start_compute_cs(struct r600_context *rctx) { - struct r600_command_buffer *cb = &ctx->start_compute_cs_cmd; + struct r600_command_buffer *cb = &rctx->start_compute_cs_cmd; int num_threads; int num_stack_entries; @@ -742,7 +722,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - switch (ctx->b.family) { + switch (rctx->b.family) { case CHIP_CEDAR: default: num_threads = 128; @@ -788,18 +768,18 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) } /* Config Registers */ - if (ctx->b.chip_class < CAYMAN) - evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family, - ctx->screen->b.info.drm_minor); + if (rctx->b.chip_class < CAYMAN) + evergreen_init_common_regs(rctx, cb, rctx->b.chip_class, rctx->b.family, + rctx->screen->b.info.drm_minor); else - cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family, - ctx->screen->b.info.drm_minor); + cayman_init_common_regs(cb, rctx->b.chip_class, rctx->b.family, + rctx->screen->b.info.drm_minor); /* The primitive type always needs to be POINTLIST for compute. */ r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE, V_008958_DI_PT_POINTLIST); - if (ctx->b.chip_class < CAYMAN) { + if (rctx->b.chip_class < CAYMAN) { /* These registers control which simds can be used by each stage. * The default for these registers is 0xffffffff, which means @@ -849,7 +829,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) * allocate the appropriate amount of LDS dwords using the * CM_R_0288E8_SQ_LDS_ALLOC register. */ - if (ctx->b.chip_class < CAYMAN) { + if (rctx->b.chip_class < CAYMAN) { r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT, S_008E2C_NUM_PS_LDS(0x0000) | S_008E2C_NUM_LS_LDS(8192)); } else { @@ -860,7 +840,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) /* Context Registers */ - if (ctx->b.chip_class < CAYMAN) { + if (rctx->b.chip_class < CAYMAN) { /* workaround for hw issues with dyn gpr - must set all limits * to 240 instead of 0, 0x1e == 240 / 8 */ @@ -902,86 +882,26 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (160 * 4), 0x1000FFF); } -void evergreen_init_compute_state_functions(struct r600_context *ctx) -{ - ctx->b.b.create_compute_state = evergreen_create_compute_state; - ctx->b.b.delete_compute_state = evergreen_delete_compute_state; - ctx->b.b.bind_compute_state = evergreen_bind_compute_state; -// ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; - ctx->b.b.set_compute_resources = evergreen_set_compute_resources; - ctx->b.b.set_global_binding = evergreen_set_global_binding; - ctx->b.b.launch_grid = evergreen_launch_grid; - -} - -struct pipe_resource *r600_compute_global_buffer_create( - struct pipe_screen *screen, - const struct pipe_resource *templ) +void evergreen_init_compute_state_functions(struct r600_context *rctx) { - struct r600_resource_global* result = NULL; - struct r600_screen* rscreen = NULL; - int size_in_dw = 0; - - assert(templ->target == PIPE_BUFFER); - assert(templ->bind & PIPE_BIND_GLOBAL); - assert(templ->array_size == 1 || templ->array_size == 0); - assert(templ->depth0 == 1 || templ->depth0 == 0); - assert(templ->height0 == 1 || templ->height0 == 0); - - result = (struct r600_resource_global*) - CALLOC(sizeof(struct r600_resource_global), 1); - rscreen = (struct r600_screen*)screen; - - COMPUTE_DBG(rscreen, "*** r600_compute_global_buffer_create\n"); - COMPUTE_DBG(rscreen, "width = %u array_size = %u\n", templ->width0, - templ->array_size); - - result->base.b.vtbl = &r600_global_buffer_vtbl; - result->base.b.b = *templ; - result->base.b.b.screen = screen; - pipe_reference_init(&result->base.b.b.reference, 1); - - size_in_dw = (templ->width0+3) / 4; - - result->chunk = compute_memory_alloc(rscreen->global_pool, size_in_dw); - - if (result->chunk == NULL) - { - free(result); - return NULL; - } - - return &result->base.b.b; -} - -void r600_compute_global_buffer_destroy( - struct pipe_screen *screen, - struct pipe_resource *res) -{ - struct r600_resource_global* buffer = NULL; - struct r600_screen* rscreen = NULL; - - assert(res->target == PIPE_BUFFER); - assert(res->bind & PIPE_BIND_GLOBAL); - - buffer = (struct r600_resource_global*)res; - rscreen = (struct r600_screen*)screen; + rctx->b.b.create_compute_state = evergreen_create_compute_state; + rctx->b.b.delete_compute_state = evergreen_delete_compute_state; + rctx->b.b.bind_compute_state = evergreen_bind_compute_state; +// rctx->context.create_sampler_view = evergreen_compute_create_sampler_view; + rctx->b.b.set_compute_resources = evergreen_set_compute_resources; + rctx->b.b.set_global_binding = evergreen_set_global_binding; + rctx->b.b.launch_grid = evergreen_launch_grid; - compute_memory_free(rscreen->global_pool, buffer->chunk->id); - - buffer->chunk = NULL; - free(res); } -void *r600_compute_global_transfer_map( - struct pipe_context *ctx_, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer) +static void *r600_compute_global_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) { - struct r600_context *rctx = (struct r600_context*)ctx_; + struct r600_context *rctx = (struct r600_context*)ctx; struct compute_memory_pool *pool = rctx->screen->global_pool; struct r600_resource_global* buffer = (struct r600_resource_global*)resource; @@ -991,7 +911,7 @@ void *r600_compute_global_transfer_map( unsigned offset = box->x; if (is_item_in_pool(item)) { - compute_memory_demote_item(pool, item, ctx_); + compute_memory_demote_item(pool, item, ctx); } else { if (item->real_buffer == NULL) { @@ -1021,13 +941,12 @@ void *r600_compute_global_transfer_map( assert(box->z == 0); ///TODO: do it better, mapping is not possible if the pool is too big - return pipe_buffer_map_range(ctx_, dst, + return pipe_buffer_map_range(ctx, dst, offset, box->width, usage, ptransfer); } -void r600_compute_global_transfer_unmap( - struct pipe_context *ctx_, - struct pipe_transfer* transfer) +static void r600_compute_global_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) { /* struct r600_resource_global are not real resources, they just map * to an offset within the compute memory pool. The function @@ -1042,23 +961,88 @@ void r600_compute_global_transfer_unmap( assert (!"This function should not be called"); } -void r600_compute_global_transfer_flush_region( - struct pipe_context *ctx_, - struct pipe_transfer *transfer, - const struct pipe_box *box) +static void r600_compute_global_transfer_flush_region(struct pipe_context *ctx, + struct pipe_transfer *transfer, + const struct pipe_box *box) { assert(0 && "TODO"); } -void r600_compute_global_transfer_inline_write( - struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned layer_stride) +static void r600_compute_global_transfer_inline_write(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { assert(0 && "TODO"); } + +static void r600_compute_global_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *res) +{ + struct r600_resource_global* buffer = NULL; + struct r600_screen* rscreen = NULL; + + assert(res->target == PIPE_BUFFER); + assert(res->bind & PIPE_BIND_GLOBAL); + + buffer = (struct r600_resource_global*)res; + rscreen = (struct r600_screen*)screen; + + compute_memory_free(rscreen->global_pool, buffer->chunk->id); + + buffer->chunk = NULL; + free(res); +} + +static const struct u_resource_vtbl r600_global_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + r600_compute_global_buffer_destroy, /* resource_destroy */ + r600_compute_global_transfer_map, /* transfer_map */ + r600_compute_global_transfer_flush_region,/* transfer_flush_region */ + r600_compute_global_transfer_unmap, /* transfer_unmap */ + r600_compute_global_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_resource_global* result = NULL; + struct r600_screen* rscreen = NULL; + int size_in_dw = 0; + + assert(templ->target == PIPE_BUFFER); + assert(templ->bind & PIPE_BIND_GLOBAL); + assert(templ->array_size == 1 || templ->array_size == 0); + assert(templ->depth0 == 1 || templ->depth0 == 0); + assert(templ->height0 == 1 || templ->height0 == 0); + + result = (struct r600_resource_global*) + CALLOC(sizeof(struct r600_resource_global), 1); + rscreen = (struct r600_screen*)screen; + + COMPUTE_DBG(rscreen, "*** r600_compute_global_buffer_create\n"); + COMPUTE_DBG(rscreen, "width = %u array_size = %u\n", templ->width0, + templ->array_size); + + result->base.b.vtbl = &r600_global_buffer_vtbl; + result->base.b.b = *templ; + result->base.b.b.screen = screen; + pipe_reference_init(&result->base.b.b.reference, 1); + + size_in_dw = (templ->width0+3) / 4; + + result->chunk = compute_memory_alloc(rscreen->global_pool, size_in_dw); + + if (result->chunk == NULL) + { + free(result); + return NULL; + } + + return &result->base.b.b; +} diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h index e4d3a38e415..3c178870d91 100644 --- a/src/gallium/drivers/r600/evergreen_compute.h +++ b/src/gallium/drivers/r600/evergreen_compute.h @@ -38,26 +38,11 @@ struct r600_resource_global { struct compute_memory_item *chunk; }; -void *evergreen_create_compute_state(struct pipe_context *ctx, const struct pipe_compute_state *cso); -void evergreen_delete_compute_state(struct pipe_context *ctx, void *state); -void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input); void evergreen_init_atom_start_compute_cs(struct r600_context *rctx); void evergreen_init_compute_state_functions(struct r600_context *rctx); void evergreen_emit_cs_shader(struct r600_context *rctx, struct r600_atom * atom); struct r600_resource* r600_compute_buffer_alloc_vram(struct r600_screen *screen, unsigned size); struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); -void r600_compute_global_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *res); -void *r600_compute_global_transfer_map( - struct pipe_context *ctx_, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer); -void r600_compute_global_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); -void r600_compute_global_transfer_flush_region( struct pipe_context *, struct pipe_transfer *, const struct pipe_box *); -void r600_compute_global_transfer_inline_write( struct pipe_context *, struct pipe_resource *, unsigned level, - unsigned usage, const struct pipe_box *, const void *data, unsigned stride, unsigned layer_stride); #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index c97e34121e3..36b808fbbca 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -281,6 +281,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 1; case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 381ad21a4e3..062c3193947 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -425,8 +425,9 @@ struct r600_common_context { unsigned flags; /* flush flags */ /* Queries. */ - /* The list of active queries. Only one query of each type can be active. */ + /* The list of active queries. */ int num_occlusion_queries; + int num_perfect_occlusion_queries; /* Keep track of non-timer queries, because they should be suspended * during context flushing. * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits, diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index f9a5721fb97..7a2d2ee7f31 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -414,14 +414,22 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx, if (type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_OCCLUSION_PREDICATE) { bool old_enable = rctx->num_occlusion_queries != 0; - bool enable; + bool old_perfect_enable = + rctx->num_perfect_occlusion_queries != 0; + bool enable, perfect_enable; rctx->num_occlusion_queries += diff; assert(rctx->num_occlusion_queries >= 0); + if (type == PIPE_QUERY_OCCLUSION_COUNTER) { + rctx->num_perfect_occlusion_queries += diff; + assert(rctx->num_perfect_occlusion_queries >= 0); + } + enable = rctx->num_occlusion_queries != 0; + perfect_enable = rctx->num_perfect_occlusion_queries != 0; - if (enable != old_enable) { + if (enable != old_enable || perfect_enable != old_perfect_enable) { rctx->set_occlusion_query_state(&rctx->b, enable); } } diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 83fc0021227..4850b73f291 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -329,6 +329,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, struct r600_resource *res = (struct r600_resource*)resource; struct r600_texture *rtex = (struct r600_texture*)resource; struct radeon_bo_metadata metadata; + bool update_metadata = false; /* This is not supported now, but it might be required for OpenCL * interop in the future. @@ -337,29 +338,30 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, (resource->nr_samples > 1 || rtex->is_depth)) return false; - if (!res->is_shared) { - res->is_shared = true; - res->external_usage = usage; - - if (resource->target != PIPE_BUFFER) { - /* Since shader image stores don't support DCC on VI, - * disable it for external clients that want write - * access. - */ - if (usage & PIPE_HANDLE_USAGE_WRITE) - r600_texture_disable_dcc(rscreen, rtex); + if (resource->target != PIPE_BUFFER) { + /* Since shader image stores don't support DCC on VI, + * disable it for external clients that want write + * access. + */ + if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) { + r600_texture_disable_dcc(rscreen, rtex); + update_metadata = true; + } - if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) { - /* Eliminate fast clear (both CMASK and DCC) */ - r600_eliminate_fast_color_clear(rscreen, rtex); + if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && + rtex->cmask.size) { + /* Eliminate fast clear (both CMASK and DCC) */ + r600_eliminate_fast_color_clear(rscreen, rtex); - /* Disable CMASK if flush_resource isn't going - * to be called. - */ - r600_texture_disable_cmask(rscreen, rtex); - } + /* Disable CMASK if flush_resource isn't going + * to be called. + */ + r600_texture_disable_cmask(rscreen, rtex); + update_metadata = true; + } - /* Set metadata. */ + /* Set metadata. */ + if (!res->is_shared || update_metadata) { r600_texture_init_metadata(rtex, &metadata); if (rscreen->query_opaque_metadata) rscreen->query_opaque_metadata(rscreen, rtex, @@ -367,8 +369,18 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, rscreen->ws->buffer_set_metadata(res->buf, &metadata); } + } + + if (res->is_shared) { + /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user + * doesn't set it. + */ + res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; + if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) + res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; } else { - assert(res->external_usage == usage); + res->is_shared = true; + res->external_usage = usage; } return rscreen->ws->buffer_get_handle(res->buf, diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 474154e52ff..71741325af0 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -55,6 +55,13 @@ enum radeon_llvm_shader_type { RADEON_LLVM_SHADER_CS = 3, }; +enum radeon_llvm_calling_convention { + RADEON_LLVM_AMDGPU_VS = 87, + RADEON_LLVM_AMDGPU_GS = 88, + RADEON_LLVM_AMDGPU_PS = 89, + RADEON_LLVM_AMDGPU_CS = 90, +}; + void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) { char str[16]; @@ -71,27 +78,35 @@ void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) { enum radeon_llvm_shader_type llvm_type; + enum radeon_llvm_calling_convention calling_conv; switch (type) { case TGSI_PROCESSOR_VERTEX: case TGSI_PROCESSOR_TESS_CTRL: case TGSI_PROCESSOR_TESS_EVAL: llvm_type = RADEON_LLVM_SHADER_VS; + calling_conv = RADEON_LLVM_AMDGPU_VS; break; case TGSI_PROCESSOR_GEOMETRY: llvm_type = RADEON_LLVM_SHADER_GS; + calling_conv = RADEON_LLVM_AMDGPU_GS; break; case TGSI_PROCESSOR_FRAGMENT: llvm_type = RADEON_LLVM_SHADER_PS; + calling_conv = RADEON_LLVM_AMDGPU_PS; break; case TGSI_PROCESSOR_COMPUTE: llvm_type = RADEON_LLVM_SHADER_CS; + calling_conv = RADEON_LLVM_AMDGPU_CS; break; default: assert(0); } - radeon_llvm_add_attribute(F, "ShaderType", llvm_type); + if (HAVE_LLVM >= 0x309) + LLVMSetFunctionCallConv(F, calling_conv); + else + radeon_llvm_add_attribute(F, "ShaderType", llvm_type); } static void init_r600_target() diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index e0dbec5fb79..c5ea8b17119 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -246,14 +246,14 @@ si_flush_depth_textures(struct si_context *sctx, struct si_textures_info *textures) { unsigned i; - unsigned mask = textures->depth_texture_mask; + uint64_t mask = textures->depth_texture_mask; while (mask) { struct pipe_sampler_view *view; struct si_sampler_view *sview; struct r600_texture *tex; - i = u_bit_scan(&mask); + i = u_bit_scan64(&mask); view = textures->views.views[i]; assert(view); @@ -329,13 +329,13 @@ si_decompress_sampler_color_textures(struct si_context *sctx, struct si_textures_info *textures) { unsigned i; - unsigned mask = textures->compressed_colortex_mask; + uint64_t mask = textures->compressed_colortex_mask; while (mask) { struct pipe_sampler_view *view; struct r600_texture *tex; - i = u_bit_scan(&mask); + i = u_bit_scan64(&mask); view = textures->views.views[i]; assert(view); @@ -355,13 +355,13 @@ si_decompress_image_color_textures(struct si_context *sctx, struct si_images_info *images) { unsigned i; - unsigned mask = images->compressed_colortex_mask; + uint64_t mask = images->compressed_colortex_mask; while (mask) { const struct pipe_image_view *view; struct r600_texture *tex; - i = u_bit_scan(&mask); + i = u_bit_scan64(&mask); view = &images->views[i]; assert(view->resource->target != PIPE_BUFFER); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 815b87bbd7e..6dd2e4fd89d 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -264,8 +264,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, unsigned slot = start + i; if (!views || !views[i]) { - samplers->depth_texture_mask &= ~(1 << slot); - samplers->compressed_colortex_mask &= ~(1 << slot); + samplers->depth_texture_mask &= ~(1llu << slot); + samplers->compressed_colortex_mask &= ~(1llu << slot); si_set_sampler_view(sctx, &samplers->views, slot, NULL); continue; } @@ -277,18 +277,18 @@ static void si_set_sampler_views(struct pipe_context *ctx, (struct r600_texture*)views[i]->texture; if (rtex->is_depth && !rtex->is_flushing_texture) { - samplers->depth_texture_mask |= 1 << slot; + samplers->depth_texture_mask |= 1llu << slot; } else { - samplers->depth_texture_mask &= ~(1 << slot); + samplers->depth_texture_mask &= ~(1llu << slot); } if (is_compressed_colortex(rtex)) { - samplers->compressed_colortex_mask |= 1 << slot; + samplers->compressed_colortex_mask |= 1llu << slot; } else { - samplers->compressed_colortex_mask &= ~(1 << slot); + samplers->compressed_colortex_mask &= ~(1llu << slot); } } else { - samplers->depth_texture_mask &= ~(1 << slot); - samplers->compressed_colortex_mask &= ~(1 << slot); + samplers->depth_texture_mask &= ~(1llu << slot); + samplers->compressed_colortex_mask &= ~(1llu << slot); } } } @@ -306,9 +306,9 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers) struct r600_texture *rtex = (struct r600_texture *)res; if (is_compressed_colortex(rtex)) { - samplers->compressed_colortex_mask |= 1 << i; + samplers->compressed_colortex_mask |= 1llu << i; } else { - samplers->compressed_colortex_mask &= ~(1 << i); + samplers->compressed_colortex_mask &= ~(1llu << i); } } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 407b9e19cc4..41bb84d68df 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -307,6 +307,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_MEMORY_INFO: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -522,7 +523,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: - return 16; + return SI_NUM_USER_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6d0d687fe4c..4158fc5461e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -137,8 +137,8 @@ struct si_cs_shader_state { struct si_textures_info { struct si_sampler_views views; - uint32_t depth_texture_mask; /* which textures are depth */ - uint32_t compressed_colortex_mask; + uint64_t depth_texture_mask; /* which textures are depth */ + uint64_t compressed_colortex_mask; }; struct si_images_info { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 56c575948ab..08da3e37550 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1328,8 +1328,9 @@ static LLVMValueRef fetch_constant( if (reg->Register.Dimension && reg->Dimension.Indirect) { LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS); LLVMValueRef index; - index = get_indirect_index(ctx, ®->DimIndirect, - reg->Dimension.Index); + index = get_bounded_indirect_index(ctx, ®->DimIndirect, + reg->Dimension.Index, + SI_NUM_USER_CONST_BUFFERS); bufp = build_indexed_load_const(ctx, ptr, index); } else bufp = ctx->const_buffers[buf]; @@ -3356,7 +3357,10 @@ static void tex_fetch_ptrs( const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src]; LLVMValueRef ind_index; - ind_index = get_indirect_index(ctx, ®->Indirect, reg->Register.Index); + ind_index = get_bounded_indirect_index(ctx, + ®->Indirect, + reg->Register.Index, + SI_NUM_USER_SAMPLERS); *res_ptr = get_sampler_desc(ctx, ind_index, DESC_IMAGE); @@ -4278,6 +4282,14 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; + /* The real barrier instruction isn’t needed, because an entire patch + * always fits into a single wave. + */ + if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { + emit_optimization_barrier(ctx); + return; + } + lp_build_intrinsic(gallivm->builder, HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier" : "llvm.AMDGPU.barrier.local", diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 10d691a92f1..8087d2331ff 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -830,25 +830,93 @@ static void si_set_scissor_states(struct pipe_context *ctx, for (i = 0; i < num_scissors; i++) sctx->scissors.states[start_slot + i] = state[i]; + if (!sctx->queued.named.rasterizer || + !sctx->queued.named.rasterizer->scissor_enable) + return; + sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot; si_mark_atom_dirty(sctx, &sctx->scissors.atom); } +static void si_get_scissor_from_viewport(struct pipe_viewport_state *vp, + struct pipe_scissor_state *scissor) +{ + /* These must be signed, unlike pipe_scissor_state. */ + int minx, miny, maxx, maxy, tmp; + + /* Convert (-1, -1) and (1, 1) from clip space into window space. */ + minx = -vp->scale[0] + vp->translate[0]; + miny = -vp->scale[1] + vp->translate[1]; + maxx = vp->scale[0] + vp->translate[0]; + maxy = vp->scale[1] + vp->translate[1]; + + /* r600_draw_rectangle sets this. Disable the scissor. */ + if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) { + minx = miny = 0; + maxx = maxy = 16384; + } + + /* Handle inverted viewports. */ + if (minx > maxx) { + tmp = minx; + minx = maxx; + maxx = tmp; + } + if (miny > maxy) { + tmp = miny; + miny = maxy; + maxy = tmp; + } + + scissor->minx = CLAMP(minx, 0, 16384); + scissor->miny = CLAMP(miny, 0, 16384); + scissor->maxx = CLAMP(maxx, 0, 16384); + scissor->maxy = CLAMP(maxy, 0, 16384); +} + +static void si_clip_scissor(struct pipe_scissor_state *out, + struct pipe_scissor_state *clip) +{ + out->minx = MAX2(out->minx, clip->minx); + out->miny = MAX2(out->miny, clip->miny); + out->maxx = MIN2(out->maxx, clip->maxx); + out->maxy = MIN2(out->maxy, clip->maxy); +} + +static void si_emit_one_scissor(struct radeon_winsys_cs *cs, + struct pipe_viewport_state *vp, + struct pipe_scissor_state *scissor) +{ + struct pipe_scissor_state final; + + /* Since the guard band disables clipping, we have to clip per-pixel + * using a scissor. + */ + si_get_scissor_from_viewport(vp, &final); + + if (scissor) + si_clip_scissor(&final, scissor); + + radeon_emit(cs, S_028250_TL_X(final.minx) | + S_028250_TL_Y(final.miny) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(final.maxx) | + S_028254_BR_Y(final.maxy)); +} + static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; struct pipe_scissor_state *states = sctx->scissors.states; unsigned mask = sctx->scissors.dirty_mask; + bool scissor_enable = sctx->queued.named.rasterizer->scissor_enable; /* The simple case: Only 1 viewport is active. */ if (mask & 1 && !si_get_vs_info(sctx)->writes_viewport_index) { radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); - radeon_emit(cs, S_028250_TL_X(states[0].minx) | - S_028250_TL_Y(states[0].miny) | - S_028250_WINDOW_OFFSET_DISABLE(1)); - radeon_emit(cs, S_028254_BR_X(states[0].maxx) | - S_028254_BR_Y(states[0].maxy)); + si_emit_one_scissor(cs, &sctx->viewports.states[0], + scissor_enable ? &states[0] : NULL); sctx->scissors.dirty_mask &= ~1; /* clear one bit */ return; } @@ -861,11 +929,8 @@ static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom) radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + start * 4 * 2, count * 2); for (i = start; i < start+count; i++) { - radeon_emit(cs, S_028250_TL_X(states[i].minx) | - S_028250_TL_Y(states[i].miny) | - S_028250_WINDOW_OFFSET_DISABLE(1)); - radeon_emit(cs, S_028254_BR_X(states[i].maxx) | - S_028254_BR_Y(states[i].maxy)); + si_emit_one_scissor(cs, &sctx->viewports.states[i], + scissor_enable ? &states[i] : NULL); } } sctx->scissors.dirty_mask = 0; @@ -883,7 +948,9 @@ static void si_set_viewport_states(struct pipe_context *ctx, sctx->viewports.states[start_slot + i] = state[i]; sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot; + sctx->scissors.dirty_mask |= ((1 << num_viewports) - 1) << start_slot; si_mark_atom_dirty(sctx, &sctx->viewports.atom); + si_mark_atom_dirty(sctx, &sctx->scissors.atom); } static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom) @@ -980,6 +1047,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, return NULL; } + rs->scissor_enable = state->scissor; rs->two_side = state->light_twoside; rs->multisample_enable = state->multisample; rs->force_persample_interp = state->force_persample_interp; @@ -1038,7 +1106,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) | - S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); + S_028A48_VPORT_SCISSOR_ENABLE(1)); si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(state->half_pixel_center) | @@ -1105,6 +1173,11 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) si_mark_atom_dirty(sctx, &sctx->db_render_state); + if (!old_rs || old_rs->scissor_enable != rs->scissor_enable) { + sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + si_mark_atom_dirty(sctx, &sctx->scissors.atom); + } + si_pm4_bind_state(sctx, rasterizer, rs); si_update_poly_offset_state(sctx); @@ -1310,16 +1383,18 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s /* DB_COUNT_CONTROL (occlusion queries) */ if (sctx->b.num_occlusion_queries > 0) { + bool perfect = sctx->b.num_perfect_occlusion_queries > 0; + if (sctx->b.chip_class >= CIK) { radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_PERFECT_ZPASS_COUNTS(perfect) | S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | S_028004_ZPASS_ENABLE(1) | S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1)); } else { radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_PERFECT_ZPASS_COUNTS(perfect) | S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); } } else { @@ -2000,6 +2075,11 @@ boolean si_is_format_supported(struct pipe_screen *screen, case 4: case 8: break; + case 16: + if (format == PIPE_FORMAT_NONE) + return TRUE; + else + return FALSE; default: return FALSE; } @@ -2623,6 +2703,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, constbuf.user_buffer = sctx->b.sample_locations_16x; break; default: + R600_ERR("Requested an invalid number of samples %i.\n", + sctx->framebuffer.nr_samples); assert(0); } constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index c4d6b9d9eee..f55f19e2918 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -68,6 +68,7 @@ struct si_state_rasterizer { bool uses_poly_offset; bool clamp_fragment_color; bool rasterizer_discard; + bool scissor_enable; }; struct si_dsa_stencil_ref_part { @@ -144,10 +145,10 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; -/* User sampler views: 0..15 - * Polygon stipple tex: 16 +/* User sampler views: 0..31 + * Polygon stipple tex: 32 */ -#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */ +#define SI_NUM_USER_SAMPLERS 32 /* AKA OpenGL textures units per shader */ #define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS #define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1) diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index bfd3598fc57..90f29d6e52a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -270,6 +270,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index db4b2735d58..1a4bf384b2a 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -94,6 +94,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp->framebuffer.width = fb->width; sp->framebuffer.height = fb->height; + sp->framebuffer.samples = fb->samples; + sp->framebuffer.layers = fb->layers; sp->dirty |= SP_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c index 0af80cd4296..0ba9313fd5e 100644 --- a/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -142,6 +142,9 @@ svga_create_blend_state(struct pipe_context *pipe, struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state ); unsigned i; + if (!blend) + return NULL; + /* Fill in the per-rendertarget blend state. We currently only * support independent blend enable and colormask per render target. */ diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c index d84ed1df48e..83fcdc3d80b 100644 --- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c +++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -134,6 +134,9 @@ svga_create_depth_stencil_state(struct pipe_context *pipe, struct svga_context *svga = svga_context(pipe); struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state ); + if (!ds) + return NULL; + /* Don't try to figure out CW/CCW correspondence with * stencil[0]/[1] at this point. Presumably this can change as * back/front face are modified. diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 8e0db539574..d397c95da98 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -161,6 +161,9 @@ svga_create_rasterizer_state(struct pipe_context *pipe, struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); struct svga_screen *screen = svga_screen(pipe->screen); + if (!rast) + return NULL; + /* need this for draw module. */ rast->templ = *templ; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index c0873c0c65a..536fb6f786f 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -404,6 +404,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; } @@ -999,8 +1000,10 @@ svga_screen_create(struct svga_winsys_screen *sws) svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS; /* Multisample samples per pixel */ - svgascreen->ms_samples = - get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0); + if (debug_get_bool_option("SVGA_MSAA", TRUE)) { + svgascreen->ms_samples = + get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0); + } /* Maximum number of constant buffers */ svgascreen->max_const_buffers = diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 57f851833e5..08b1d32afb0 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1686,6 +1686,44 @@ static void trace_context_set_shader_buffers(struct pipe_context *_context, FREE(_buffers); } +static void trace_context_set_shader_images(struct pipe_context *_context, + unsigned shader, + unsigned start, unsigned nr, + struct pipe_image_view *images) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + struct pipe_image_view *_images = NULL; + + trace_dump_call_begin("pipe_context", "set_shader_images"); + trace_dump_arg(ptr, context); + trace_dump_arg(uint, shader); + trace_dump_arg(uint, start); + trace_dump_arg_begin("images"); + trace_dump_struct_array(image_view, images, nr); + trace_dump_arg_end(); + trace_dump_call_end(); + + if (images) { + int i; + + _images = MALLOC(nr * sizeof(struct pipe_image_view)); + if (!_images) + return; + + for (i = 0; i < nr; i++) { + _images[i] = images[i]; + _images[i].resource = trace_resource_unwrap(tr_context, + _images[i].resource); + } + } + + context->set_shader_images(context, shader, start, nr, _images); + + if (_images) + FREE(_images); +} + static void trace_context_launch_grid(struct pipe_context *_pipe, const struct pipe_grid_info *info) { @@ -1809,6 +1847,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(set_tess_state); TR_CTX_INIT(set_shader_buffers); TR_CTX_INIT(launch_grid); + TR_CTX_INIT(set_shader_images); TR_CTX_INIT(transfer_map); TR_CTX_INIT(transfer_unmap); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index b53d7dbec2f..591ca79a2fa 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -481,6 +481,8 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); + trace_dump_member(uint, state, samples); + trace_dump_member(uint, state, layers); trace_dump_member(uint, state, nr_cbufs); trace_dump_member_array(ptr, state, cbufs); trace_dump_member(ptr, state, zsbuf); @@ -738,6 +740,46 @@ void trace_dump_shader_buffer(const struct pipe_shader_buffer *state) } +void trace_dump_image_view(const struct pipe_image_view *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_image_view"); + trace_dump_member(resource_ptr, state, resource); + trace_dump_member(uint, state, format); + trace_dump_member(uint, state, access); + + trace_dump_member_begin("u"); + trace_dump_struct_begin(""); /* anonymous */ + if (state->resource->target == PIPE_BUFFER) { + trace_dump_member_begin("buf"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.buf, first_element); + trace_dump_member(uint, &state->u.buf, last_element); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* buf */ + } else { + trace_dump_member_begin("tex"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.tex, first_layer); + trace_dump_member(uint, &state->u.tex, last_layer); + trace_dump_member(uint, &state->u.tex, level); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* tex */ + } + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* u */ + + trace_dump_struct_end(); +} + + void trace_dump_draw_info(const struct pipe_draw_info *state) { if (!trace_dumping_enabled_locked()) diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index ee0720d8ac8..fd2bc503052 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -91,4 +91,6 @@ void trace_dump_query_result(unsigned query_type, void trace_dump_grid_info(const struct pipe_grid_info *state); +void trace_dump_image_view(const struct pipe_image_view *view); + #endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 92d910ba6a5..167a2f5bd8e 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -207,6 +207,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c index 8126bdec40c..5a5afc1712f 100644 --- a/src/gallium/drivers/virgl/virgl_screen.c +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -239,6 +239,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: return 0x1af4; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 6f30f9ed7d3..5e204a3e5ea 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -690,6 +690,7 @@ enum pipe_cap PIPE_CAP_PCI_BUS, PIPE_CAP_PCI_DEVICE, PIPE_CAP_PCI_FUNCTION, + PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 5ab53728e82..9e466cefd8c 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -57,7 +57,7 @@ extern "C" { #define PIPE_MAX_CLIP_PLANES 8 #define PIPE_MAX_COLOR_BUFS 8 #define PIPE_MAX_CONSTANT_BUFFERS 32 -#define PIPE_MAX_SAMPLERS 18 /* 16 public + 2 driver internal */ +#define PIPE_MAX_SAMPLERS 32 #define PIPE_MAX_SHADER_INPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */ #define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */ #define PIPE_MAX_SHADER_SAMPLER_VIEWS 32 @@ -298,9 +298,17 @@ struct pipe_stencil_ref }; +/** + * Note that pipe_surfaces are "texture views for rendering" + * and so in the case of ARB_framebuffer_no_attachment there + * is no pipe_surface state available such that we may + * extract the number of samples and layers. + */ struct pipe_framebuffer_state { unsigned width, height; + unsigned samples; /**< Number of samples in a no-attachment framebuffer */ + unsigned layers; /**< Number of layers in a no-attachment framebuffer */ /** multiple color buffers for multiple render targets */ unsigned nr_cbufs; diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc index 183a1dcabe8..3912d8b8c7e 100644 --- a/src/mesa/drivers/dri/common/drirc +++ b/src/mesa/drivers/dri/common/drirc @@ -88,5 +88,13 @@ TODO: document the other workarounds. <application name="Second Life" executable="do-not-directly-run-secondlife-bin"> <option name="allow_glsl_extension_directive_midshader" value="true" /> </application> + + <application name="Warsow (32-bit)" executable="warsow.i386"> + <option name="allow_glsl_extension_directive_midshader" value="true" /> + </application> + + <application name="Warsow (64-bit)" executable="warsow.x86_64"> + <option name="allow_glsl_extension_directive_midshader" value="true" /> + </application> </device> </driconf> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index 405020b77e5..5b770aa7af1 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -121,24 +121,36 @@ bblock_end_const(const struct bblock_t *block) static inline struct bblock_t * bblock_next(struct bblock_t *block) { + if (exec_node_is_tail_sentinel(block->link.next)) + return NULL; + return (struct bblock_t *)block->link.next; } static inline const struct bblock_t * bblock_next_const(const struct bblock_t *block) { + if (exec_node_is_tail_sentinel(block->link.next)) + return NULL; + return (const struct bblock_t *)block->link.next; } static inline struct bblock_t * bblock_prev(struct bblock_t *block) { + if (exec_node_is_head_sentinel(block->link.prev)) + return NULL; + return (struct bblock_t *)block->link.prev; } static inline const struct bblock_t * bblock_prev_const(const struct bblock_t *block) { + if (exec_node_is_head_sentinel(block->link.prev)) + return NULL; + return (const struct bblock_t *)block->link.prev; } diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 231e0001d54..a42583bb477 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -403,6 +403,12 @@ struct brw_wm_prog_data { uint32_t barycentric_interp_modes; /** + * Mask of which FS inputs are marked flat by the shader source. This is + * needed for setting up 3DSTATE_SF/SBE. + */ + uint32_t flat_inputs; + + /** * Map from gl_varying_slot to the position within the FS setup data * payload where the varying's attribute vertex deltas should be delivered. * For varying slots that are not used by the FS, the value is -1. diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 2c1abaf255c..114dc6cb212 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -42,6 +42,10 @@ dead_control_flow_eliminate(backend_shader *s) foreach_block_safe (block, s->cfg) { bblock_t *prev_block = block->prev(); + + if (!prev_block) + continue; + backend_instruction *const inst = block->start(); backend_instruction *const prev_inst = prev_block->end(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 33c4adc4705..3f307f4ef70 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2822,17 +2822,15 @@ fs_visitor::emit_repclear_shader() int color_mrf = base_mrf + 2; fs_inst *mov; - if (uniforms == 1) { + if (uniforms > 0) { mov = bld.exec_all().group(4, 0) .MOV(brw_message_reg(color_mrf), fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); } else { struct brw_reg reg = - brw_reg(BRW_GENERAL_REGISTER_FILE, - 2, 3, 0, 0, BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + brw_reg(BRW_GENERAL_REGISTER_FILE, 2, 3, 0, 0, BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); mov = bld.exec_all().group(4, 0) .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg)); @@ -2865,7 +2863,7 @@ fs_visitor::emit_repclear_shader() assign_curb_setup(); /* Now that we have the uniform assigned, go ahead and force it to a vec4. */ - if (uniforms == 1) { + if (uniforms > 0) { assert(mov->src[0].file == FIXED_GRF); mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0); } @@ -5614,6 +5612,31 @@ brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo, return barycentric_interp_modes; } +static void +brw_compute_flat_inputs(struct brw_wm_prog_data *prog_data, + bool shade_model_flat, const nir_shader *shader) +{ + prog_data->flat_inputs = 0; + + nir_foreach_variable(var, &shader->inputs) { + enum glsl_interp_qualifier interp_qualifier = + (enum glsl_interp_qualifier)var->data.interpolation; + bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) || + (var->data.location == VARYING_SLOT_COL1); + + int input_index = prog_data->urb_setup[var->data.location]; + + if (input_index < 0) + continue; + + /* flat shading */ + if (interp_qualifier == INTERP_QUALIFIER_FLAT || + (shade_model_flat && is_gl_Color && + interp_qualifier == INTERP_QUALIFIER_NONE)) + prog_data->flat_inputs |= (1 << input_index); + } +} + static uint8_t computed_depth_mode(const nir_shader *shader) { @@ -5698,6 +5721,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } } + /* We have to compute the flat inputs after the visitor is finished running + * because it relies on prog_data->urb_setup which is computed in + * fs_visitor::calculate_urb_setup(). + */ + brw_compute_flat_inputs(prog_data, key->flat_shade, shader); + cfg_t *simd8_cfg; int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send; if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 60d58b19ef2..b27b170ebc3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3079,7 +3079,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset; - /* Our hardware requires a LOD for buffer textures */ + /* The hardware requires a LOD for buffer textures */ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) lod = brw_imm_d(0); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 736deb443dd..376cb258232 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -938,7 +938,7 @@ static void adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) { for (bblock_t *block_iter = start_block->next(); - !block_iter->link.is_tail_sentinel(); + block_iter; block_iter = block_iter->next()) { block_iter->start_ip += ip_adjustment; block_iter->end_ip += ip_adjustment; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 783af78479e..2dc0a0da45b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -368,7 +368,6 @@ void calculate_attr_overrides(const struct brw_context *brw, uint16_t *attr_overrides, uint32_t *point_sprite_enables, - uint32_t *flat_enables, uint32_t *urb_entry_read_length, uint32_t *urb_entry_read_offset); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 4cb03adb2bf..b63e44a3bfb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1724,7 +1724,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) nir_tex_instr_dest_size(instr)); dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); - /* Our hardware requires a LOD for buffer textures */ + /* The hardware requires a LOD for buffer textures */ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) lod = brw_imm_d(0); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 42f9a5ca8b6..4fdcb8d80e5 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -155,14 +155,12 @@ void calculate_attr_overrides(const struct brw_context *brw, uint16_t *attr_overrides, uint32_t *point_sprite_enables, - uint32_t *flat_enables, uint32_t *urb_entry_read_length, uint32_t *urb_entry_read_offset) { uint32_t max_source_attr = 0; *point_sprite_enables = 0; - *flat_enables = 0; *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; @@ -180,9 +178,6 @@ calculate_attr_overrides(const struct brw_context *brw, *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1; - /* _NEW_LIGHT */ - bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT; - /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE, * description of dw10 Point Sprite Texture Coordinate Enable: * @@ -208,10 +203,6 @@ calculate_attr_overrides(const struct brw_context *brw, memset(attr_overrides, 0, 16*sizeof(*attr_overrides)); for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - /* BRW_NEW_FRAGMENT_PROGRAM */ - enum glsl_interp_qualifier interp_qualifier = - brw->fragment_program->InterpQualifier[attr]; - bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1; /* BRW_NEW_FS_PROG_DATA */ int input_index = brw->wm.prog_data->urb_setup[attr]; @@ -234,12 +225,6 @@ calculate_attr_overrides(const struct brw_context *brw, *point_sprite_enables |= (1 << input_index); } - /* flat shading */ - if (interp_qualifier == INTERP_QUALIFIER_FLAT || - (shade_model_flat && is_gl_Color && - interp_qualifier == INTERP_QUALIFIER_NONE)) - *flat_enables |= (1 << input_index); - /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */ uint16_t attr_override = point_sprite ? 0 : get_attr_override(&brw->vue_map_geom_out, @@ -285,7 +270,6 @@ upload_sf_state(struct brw_context *brw) uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs; uint32_t dw1, dw2, dw3, dw4; uint32_t point_sprite_enables; - uint32_t flat_enables; int i; /* _NEW_BUFFER */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); @@ -428,8 +412,7 @@ upload_sf_state(struct brw_context *brw) uint32_t urb_entry_read_length; uint32_t urb_entry_read_offset; calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, - &flat_enables, &urb_entry_read_length, - &urb_entry_read_offset); + &urb_entry_read_length, &urb_entry_read_offset); dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); @@ -446,7 +429,7 @@ upload_sf_state(struct brw_context *brw) OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(point_sprite_enables); /* dw16 */ - OUT_BATCH(flat_enables); + OUT_BATCH(brw->wm.prog_data->flat_inputs); OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 7c98c73edf8..c76789fa252 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -38,7 +38,6 @@ upload_sbe_state(struct brw_context *brw) uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs; uint32_t dw1; uint32_t point_sprite_enables; - uint32_t flat_enables; int i; uint16_t attr_overrides[16]; /* _NEW_BUFFERS */ @@ -66,8 +65,7 @@ upload_sbe_state(struct brw_context *brw) uint32_t urb_entry_read_length; uint32_t urb_entry_read_offset; calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, - &flat_enables, &urb_entry_read_length, - &urb_entry_read_offset); + &urb_entry_read_length, &urb_entry_read_offset); dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; @@ -81,7 +79,7 @@ upload_sbe_state(struct brw_context *brw) } OUT_BATCH(point_sprite_enables); /* dw10 */ - OUT_BATCH(flat_enables); + OUT_BATCH(brw->wm.prog_data->flat_inputs); OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 2ac21f7c873..5a97c1d0e90 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -39,7 +39,6 @@ upload_sbe(struct brw_context *brw) uint32_t urb_entry_read_length; uint32_t urb_entry_read_offset; uint32_t point_sprite_enables; - uint32_t flat_enables; int sbe_cmd_length; uint32_t dw1 = @@ -66,7 +65,6 @@ upload_sbe(struct brw_context *brw) */ calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, - &flat_enables, &urb_entry_read_length, &urb_entry_read_offset); @@ -109,7 +107,7 @@ upload_sbe(struct brw_context *brw) OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2)); OUT_BATCH(dw1); OUT_BATCH(point_sprite_enables); - OUT_BATCH(flat_enables); + OUT_BATCH(brw->wm.prog_data->flat_inputs); if (sbe_cmd_length >= 6) { OUT_BATCH(dw4); OUT_BATCH(dw5); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index bb8d4c3112b..c81f5a083eb 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1369,6 +1369,11 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer) bind_renderbuffer(target, renderbuffer, true); } +/** + * ARB_framebuffer_no_attachment - Application passes requested param's + * here. NOTE: NumSamples requested need not be _NumSamples which is + * what the hw supports. + */ static void framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum pname, GLint param, const char *func) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 1fbda420401..6c09948af04 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2295,30 +2295,6 @@ struct gl_shader */ unsigned num_combined_uniform_components; - /** - * This shader's uniform/ssbo block information. - * - * These fields are only set post-linking. - * - * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is - * useful during the linking process so that we don't have to handle SSBOs - * specifically. - * - * UniformBlocks is a list of UBOs. This is useful for backends that need - * or prefer to see separate index spaces for UBOS and SSBOs like the GL - * API specifies. - * - * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that - * need or prefer to see separate index spaces for UBOS and SSBOs like the - * GL API specifies. - * - * UniformBlocks and ShaderStorageBlocks only have pointers into - * BufferInterfaceBlocks so the actual resource information is not - * duplicated. - */ - unsigned NumBufferInterfaceBlocks; - struct gl_uniform_block **BufferInterfaceBlocks; - unsigned NumUniformBlocks; struct gl_uniform_block **UniformBlocks; @@ -2529,11 +2505,6 @@ struct gl_uniform_block */ GLuint UniformBufferSize; - /** - * Is this actually an interface block for a shader storage buffer? - */ - bool IsShaderStorage; - /** Stages that reference this block */ uint8_t stageref; @@ -2809,33 +2780,11 @@ struct gl_shader_program */ unsigned LastClipDistanceArraySize; - /** - * This shader's uniform/ssbo block information. - * - * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is - * useful during the linking process so that we don't have to handle SSBOs - * specifically. - * - * UniformBlocks is a list of UBOs. This is useful for backends that need - * or prefer to see separate index spaces for UBOS and SSBOs like the GL - * API specifies. - * - * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that - * need or prefer to see separate index spaces for UBOS and SSBOs like the - * GL API specifies. - * - * UniformBlocks and ShaderStorageBlocks only have pointers into - * BufferInterfaceBlocks so the actual resource information is not - * duplicated and are only set after linking. - */ - unsigned NumBufferInterfaceBlocks; - struct gl_uniform_block *BufferInterfaceBlocks; - unsigned NumUniformBlocks; - struct gl_uniform_block **UniformBlocks; + struct gl_uniform_block *UniformBlocks; unsigned NumShaderStorageBlocks; - struct gl_uniform_block **ShaderStorageBlocks; + struct gl_uniform_block *ShaderStorageBlocks; /** * Map of active uniform names to locations diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 4ef6a81204e..2c1a6ee3505 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -925,8 +925,11 @@ is_resource_referenced(struct gl_shader_program *shProg, if (res->Type == GL_ATOMIC_COUNTER_BUFFER) return RESOURCE_ATC(res)->StageReferences[stage]; - if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK) - return shProg->BufferInterfaceBlocks[index].stageref & (1 << stage); + if (res->Type == GL_UNIFORM_BLOCK) + return shProg->UniformBlocks[index].stageref & (1 << stage); + + if (res->Type == GL_SHADER_STORAGE_BLOCK) + return shProg->ShaderStorageBlocks[index].stageref & (1 << stage); return res->StageReferences & (1 << stage); } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index ba2607221d9..b28b5ce5457 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -727,7 +727,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, for (i = 0; i < shProg->NumUniformBlocks; i++) { /* Add one for the terminating NUL character. */ - const GLint len = strlen(shProg->UniformBlocks[i]->Name) + 1; + const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1; if (len > max_len) max_len = len; diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 8b9166ceecb..274cb129b07 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -292,9 +292,13 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) ralloc_free(shProg->InfoLog); shProg->InfoLog = ralloc_strdup(shProg, ""); - ralloc_free(shProg->BufferInterfaceBlocks); - shProg->BufferInterfaceBlocks = NULL; - shProg->NumBufferInterfaceBlocks = 0; + ralloc_free(shProg->UniformBlocks); + shProg->UniformBlocks = NULL; + shProg->NumUniformBlocks = 0; + + ralloc_free(shProg->ShaderStorageBlocks); + shProg->ShaderStorageBlocks = NULL; + shProg->NumShaderStorageBlocks = 0; ralloc_free(shProg->AtomicBuffers); shProg->AtomicBuffers = NULL; diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 7dcbdccf442..a9308d09f69 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -1016,13 +1016,13 @@ _mesa_UniformBlockBinding(GLuint program, return; } - if (shProg->UniformBlocks[uniformBlockIndex]->Binding != + if (shProg->UniformBlocks[uniformBlockIndex].Binding != uniformBlockBinding) { FLUSH_VERTICES(ctx, 0); ctx->NewDriverState |= ctx->DriverFlags.NewUniformBuffer; - shProg->UniformBlocks[uniformBlockIndex]->Binding = uniformBlockBinding; + shProg->UniformBlocks[uniformBlockIndex].Binding = uniformBlockBinding; } } @@ -1059,13 +1059,13 @@ _mesa_ShaderStorageBlockBinding(GLuint program, return; } - if (shProg->ShaderStorageBlocks[shaderStorageBlockIndex]->Binding != + if (shProg->ShaderStorageBlocks[shaderStorageBlockIndex].Binding != shaderStorageBlockBinding) { FLUSH_VERTICES(ctx, 0); ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer; - shProg->ShaderStorageBlocks[shaderStorageBlockIndex]->Binding = + shProg->ShaderStorageBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding; } } diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index ae883a2535e..ade3d61dc70 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -43,6 +43,7 @@ #include "util/u_math.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "main/framebuffer.h" /** @@ -64,6 +65,41 @@ update_framebuffer_size(struct pipe_framebuffer_state *framebuffer, framebuffer->height = MIN2(framebuffer->height, surface->height); } +/** + * Round up the requested multisample count to the next supported sample size. + */ +static unsigned +framebuffer_quantize_num_samples(struct st_context *st, unsigned num_samples) +{ + struct pipe_screen *screen = st->pipe->screen; + int quantized_samples = 0; + unsigned msaa_mode; + + if (!num_samples) + return 0; + + /* Assumes the highest supported MSAA is a power of 2 */ + msaa_mode = util_next_power_of_two(st->ctx->Const.MaxFramebufferSamples); + assert(!(num_samples > msaa_mode)); /* be safe from infinite loops */ + + /** + * Check if the MSAA mode that is higher than the requested + * num_samples is supported, and if so returning it. + */ + for (; msaa_mode >= num_samples; msaa_mode = msaa_mode / 2) { + /** + * For ARB_framebuffer_no_attachment, A format of + * PIPE_FORMAT_NONE implies what number of samples is + * supported for a framebuffer with no attachment. Thus the + * drivers callback must be adjusted for this. + */ + if (screen->is_format_supported(screen, PIPE_FORMAT_NONE, + PIPE_TEXTURE_2D, msaa_mode, + PIPE_BIND_RENDER_TARGET)) + quantized_samples = msaa_mode; + } + return quantized_samples; +} /** * Update framebuffer state (color, depth, stencil, etc. buffers) @@ -79,10 +115,22 @@ update_framebuffer_state( struct st_context *st ) st_flush_bitmap_cache(st); st->state.fb_orientation = st_fb_orientation(fb); - framebuffer->width = UINT_MAX; - framebuffer->height = UINT_MAX; - /*printf("------ fb size %d x %d\n", fb->Width, fb->Height);*/ + /** + * Quantize the derived default number of samples: + * + * A query to the driver of supported MSAA values the + * hardware supports is done as to legalize the number + * of application requested samples, NumSamples. + * See commit eb9cf3c for more information. + */ + fb->DefaultGeometry._NumSamples = + framebuffer_quantize_num_samples(st, fb->DefaultGeometry.NumSamples); + + framebuffer->width = _mesa_geometric_width(fb); + framebuffer->height = _mesa_geometric_height(fb); + framebuffer->samples = _mesa_geometric_samples(fb); + framebuffer->layers = _mesa_geometric_layers(fb); /* Examine Mesa's ctx->DrawBuffer->_ColorDrawBuffers state * to determine which surfaces to draw to diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 366163e42df..ed9deb03327 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -244,7 +244,7 @@ static void update_raster_state( struct st_context *st ) _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleShading && ctx->Multisample.MinSampleShadingValue * - ctx->DrawBuffer->Visual.samples > 1; + _mesa_geometric_samples(ctx->DrawBuffer) > 1; /* _NEW_SCISSOR */ raster->scissor = ctx->Scissor.EnableFlags; diff --git a/src/mesa/state_tracker/st_atom_scissor.c b/src/mesa/state_tracker/st_atom_scissor.c index 4ebe799e35d..605d5cba9e7 100644 --- a/src/mesa/state_tracker/st_atom_scissor.c +++ b/src/mesa/state_tracker/st_atom_scissor.c @@ -32,6 +32,7 @@ #include "main/macros.h" +#include "main/framebuffer.h" #include "st_context.h" #include "pipe/p_context.h" #include "st_atom.h" @@ -46,14 +47,17 @@ update_scissor( struct st_context *st ) struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS]; const struct gl_context *ctx = st->ctx; const struct gl_framebuffer *fb = ctx->DrawBuffer; + const unsigned int fb_width = _mesa_geometric_width(fb); + const unsigned int fb_height = _mesa_geometric_height(fb); GLint miny, maxy; unsigned i; bool changed = false; + for (i = 0 ; i < ctx->Const.MaxViewports; i++) { scissor[i].minx = 0; scissor[i].miny = 0; - scissor[i].maxx = fb->Width; - scissor[i].maxy = fb->Height; + scissor[i].maxx = fb_width; + scissor[i].maxy = fb_height; if (ctx->Scissor.EnableFlags & (1 << i)) { /* need to be careful here with xmax or ymax < 0 */ diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index a7926295277..e2af2357f02 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -16,6 +16,7 @@ #include "main/image.h" #include "main/macros.h" #include "main/teximage.h" +#include "main/framebuffer.h" #include "program/program.h" #include "program/prog_print.h" @@ -166,8 +167,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, /* positions (in clip coords) */ { const struct gl_framebuffer *fb = ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat)fb->Width; - const GLfloat fb_height = (GLfloat)fb->Height; + const GLfloat fb_width = (GLfloat)_mesa_geometric_width(fb); + const GLfloat fb_height = (GLfloat)_mesa_geometric_height(fb); const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); @@ -262,8 +263,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, { const struct gl_framebuffer *fb = ctx->DrawBuffer; const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); - const GLfloat width = (GLfloat)fb->Width; - const GLfloat height = (GLfloat)fb->Height; + const GLfloat width = (GLfloat)_mesa_geometric_width(fb); + const GLfloat height = (GLfloat)_mesa_geometric_height(fb); struct pipe_viewport_state vp; vp.scale[0] = 0.5f * width; vp.scale[1] = height * (invert ? -0.5f : 0.5f); diff --git a/src/mesa/state_tracker/st_cb_msaa.c b/src/mesa/state_tracker/st_cb_msaa.c index d581f2121b0..22001e49973 100644 --- a/src/mesa/state_tracker/st_cb_msaa.c +++ b/src/mesa/state_tracker/st_cb_msaa.c @@ -27,6 +27,7 @@ #include "main/bufferobj.h" #include "main/imports.h" +#include "main/framebuffer.h" #include "state_tracker/st_cb_msaa.h" #include "state_tracker/st_context.h" @@ -47,7 +48,8 @@ st_GetSamplePosition(struct gl_context *ctx, st_validate_state(st, ST_PIPELINE_RENDER); if (st->pipe->get_sample_position) - st->pipe->get_sample_position(st->pipe, (unsigned) fb->Visual.samples, + st->pipe->get_sample_position(st->pipe, + _mesa_geometric_samples(fb), index, outPos); else outPos[0] = outPos[1] = 0.5f; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 6c0df8d2a98..287894317df 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -445,6 +445,18 @@ void st_init_limits(struct pipe_screen *screen, extensions->ARB_shader_image_load_store = GL_TRUE; extensions->ARB_shader_image_size = GL_TRUE; } + + /* ARB_framebuffer_no_attachments */ + c->MaxFramebufferWidth = c->MaxViewportWidth; + c->MaxFramebufferHeight = c->MaxViewportHeight; + /* NOTE: we cheat here a little by assuming that + * PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS has the same + * number of layers as we need, although we technically + * could have more the generality is not really useful + * in practicality. + */ + c->MaxFramebufferLayers = + screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS); } @@ -956,6 +968,9 @@ void st_init_extensions(struct pipe_screen *screen, enum pipe_format int_formats[] = { PIPE_FORMAT_R8G8B8A8_SINT }; + enum pipe_format void_formats[] = { + PIPE_FORMAT_NONE + }; consts->MaxSamples = get_max_samples_for_formats(screen, ARRAY_SIZE(color_formats), @@ -976,6 +991,12 @@ void st_init_extensions(struct pipe_screen *screen, get_max_samples_for_formats(screen, ARRAY_SIZE(int_formats), int_formats, consts->MaxSamples, PIPE_BIND_SAMPLER_VIEW); + + /* ARB_framebuffer_no_attachments, assume max no. of samples 32 */ + consts->MaxFramebufferSamples = + get_max_samples_for_formats(screen, ARRAY_SIZE(void_formats), + void_formats, 32, + PIPE_BIND_RENDER_TARGET); } if (consts->MaxSamples == 1) { /* one sample doesn't really make sense */ @@ -1068,6 +1089,13 @@ void st_init_extensions(struct pipe_screen *screen, extensions->AMD_vertex_shader_viewport_index = GL_TRUE; } + /* ARB_framebuffer_no_attachments */ + if (screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) && + ((consts->MaxSamples >= 4 && consts->MaxFramebufferLayers >= 2048) || + (consts->MaxFramebufferSamples >= consts->MaxSamples && + consts->MaxFramebufferLayers >= consts->MaxArrayTextureLayers))) + extensions->ARB_framebuffer_no_attachments = GL_TRUE; + /* GL_ARB_ES3_compatibility. * * Assume that ES3 is supported if GLSL 3.30 is supported. diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index cd481c166e7..b9ab7ae9919 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -389,7 +389,7 @@ public: unsigned num_output_arrays; int num_address_regs; - int samplers_used; + uint32_t samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ int buffers_used; @@ -4290,6 +4290,8 @@ glsl_to_tgsi_visitor::visit(ir_barrier *ir) glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { + STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS); + result.file = PROGRAM_UNDEFINED; next_temp = 1; array_sizes = NULL; @@ -4346,7 +4348,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) if (inst->info->is_tex) { for (int i = 0; i < inst->sampler_array_size; i++) { unsigned idx = inst->sampler_base + i; - v->samplers_used |= 1 << idx; + v->samplers_used |= 1u << idx; debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); v->sampler_types[idx] = inst->tex_type; @@ -6325,7 +6327,7 @@ st_translate_program( /* texture samplers */ for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { - if (program->samplers_used & (1 << i)) { + if (program->samplers_used & (1u << i)) { unsigned type; t->samplers[i] = ureg_DECL_sampler(ureg, i); |