From 89f73827d03e28af548cc11247ebd6e2825789af Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 26 Sep 2015 03:13:11 +0200 Subject: ddebug: separate creation of debug files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used by radeonsi for logging. Reviewed-by: Michel Dänzer --- src/gallium/drivers/ddebug/dd_draw.c | 27 +------------- src/gallium/drivers/ddebug/dd_pipe.h | 4 +- src/gallium/drivers/ddebug/dd_util.h | 71 ++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 28 deletions(-) create mode 100644 src/gallium/drivers/ddebug/dd_util.h diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c index 1c986238708..b443c5b0b03 100644 --- a/src/gallium/drivers/ddebug/dd_draw.c +++ b/src/gallium/drivers/ddebug/dd_draw.c @@ -30,9 +30,6 @@ #include "util/u_dump.h" #include "util/u_format.h" #include "tgsi/tgsi_scan.h" -#include "os/os_process.h" -#include -#include enum call_type @@ -88,33 +85,13 @@ struct dd_call } info; }; - static FILE * dd_get_file_stream(struct dd_context *dctx) { struct pipe_screen *screen = dctx->pipe->screen; - static unsigned index; - char proc_name[128], dir[256], name[512]; - FILE *f; - - if (!os_get_process_name(proc_name, sizeof(proc_name))) { - fprintf(stderr, "dd: can't get the process name\n"); - return NULL; - } - - snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", ".")); - - if (mkdir(dir, 0774) && errno != EEXIST) { - fprintf(stderr, "dd: can't create a directory (%i)\n", errno); - return NULL; - } - - snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++); - f = fopen(name, "w"); - if (!f) { - fprintf(stderr, "dd: can't open file %s\n", name); + FILE *f = dd_get_debug_file(); + if (!f) return NULL; - } fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen)); fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen)); diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h index c78d112988a..34f59203e4b 100644 --- a/src/gallium/drivers/ddebug/dd_pipe.h +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -31,9 +31,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "pipe/p_screen.h" - -/* name of the directory in home */ -#define DD_DIR "ddebug_dumps" +#include "dd_util.h" enum dd_mode { DD_DETECT_HANGS, diff --git a/src/gallium/drivers/ddebug/dd_util.h b/src/gallium/drivers/ddebug/dd_util.h new file mode 100644 index 00000000000..c217c8eed68 --- /dev/null +++ b/src/gallium/drivers/ddebug/dd_util.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DD_UTIL_H +#define DD_UTIL_H + +#include +#include +#include +#include + +#include "os/os_process.h" +#include "util/u_debug.h" + +/* name of the directory in home */ +#define DD_DIR "ddebug_dumps" + +static inline FILE * +dd_get_debug_file() +{ + static unsigned index; + char proc_name[128], dir[256], name[512]; + FILE *f; + + if (!os_get_process_name(proc_name, sizeof(proc_name))) { + fprintf(stderr, "dd: can't get the process name\n"); + return NULL; + } + + snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", ".")); + + if (mkdir(dir, 0774) && errno != EEXIST) { + fprintf(stderr, "dd: can't create a directory (%i)\n", errno); + return NULL; + } + + snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++); + f = fopen(name, "w"); + if (!f) { + fprintf(stderr, "dd: can't open file %s\n", name); + return NULL; + } + + return f; +} + +#endif /* DD_UTIL_H */ -- cgit v1.2.3 From 4502d0bf8857d5900882466a69ca8cae2ee5d90e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 26 Sep 2015 03:14:43 +0200 Subject: radeonsi: move dumping the last IB into its own function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: indentation fix Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_debug.c | 51 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index d3fd201ae26..ccdc6921896 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -392,6 +392,33 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) fprintf(f, "\n"); } +static void si_dump_last_ib(struct si_context *sctx, FILE *f) +{ + int last_trace_id = -1; + + if (!sctx->last_ib) + return; + + if (sctx->last_trace_buf) { + /* We are expecting that the ddebug pipe has already + * waited for the context, so this buffer should be idle. + * If the GPU is hung, there is no point in waiting for it. + */ + uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf, + NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ); + if (map) + last_trace_id = *map; + } + + si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size, + last_trace_id); + free(sctx->last_ib); /* dump only once */ + sctx->last_ib = NULL; + r600_resource_reference(&sctx->last_trace_buf, NULL); +} + static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flags) { @@ -406,29 +433,7 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_shader(sctx->gs_shader, "Geometry", f); si_dump_shader(sctx->ps_shader, "Fragment", f); - if (sctx->last_ib) { - int last_trace_id = -1; - - if (sctx->last_trace_buf) { - /* We are expecting that the ddebug pipe has already - * waited for the context, so this buffer should be idle. - * If the GPU is hung, there is no point in waiting for it. - */ - uint32_t *map = - sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf, - NULL, - PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_READ); - if (map) - last_trace_id = *map; - } - - si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size, - last_trace_id); - free(sctx->last_ib); /* dump only once */ - sctx->last_ib = NULL; - r600_resource_reference(&sctx->last_trace_buf, NULL); - } + si_dump_last_ib(sctx, f); fprintf(f, "Done.\n"); } -- cgit v1.2.3 From 9bd7928a35c27d3d0898db83bc8db823a6dbee5e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 26 Sep 2015 03:15:40 +0200 Subject: radeonsi: add an option for debugging VM faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_debug.c | 113 ++++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_hw_context.c | 4 + src/gallium/drivers/radeonsi/si_pipe.c | 3 + src/gallium/drivers/radeonsi/si_pipe.h | 2 + 6 files changed, 124 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 08839343b74..7ac94caad9f 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -359,6 +359,7 @@ static const struct debug_named_value common_debug_options[] = { { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, { "nowc", DBG_NO_WC, "Disable GTT write combining" }, + { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 534b987a2cc..2df93e54559 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -98,6 +98,7 @@ #define DBG_PRECOMPILE (1llu << 39) #define DBG_INFO (1llu << 40) #define DBG_NO_WC (1llu << 41) +#define DBG_CHECK_VM (1llu << 42) #define R600_MAP_BUFFER_ALIGNMENT 64 diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index ccdc6921896..3d127236831 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -28,6 +28,7 @@ #include "si_shader.h" #include "sid.h" #include "sid_tables.h" +#include "ddebug/dd_util.h" static void si_dump_shader(struct si_shader_selector *sel, const char *name, @@ -438,7 +439,119 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, fprintf(f, "Done.\n"); } +static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr) +{ + char line[2000]; + unsigned sec, usec; + int progress = 0; + uint64_t timestamp = 0; + bool fault = false; + + FILE *p = popen("dmesg", "r"); + if (!p) + return false; + + while (fgets(line, sizeof(line), p)) { + char *msg, len; + + /* Get the timestamp. */ + if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) { + assert(0); + continue; + } + timestamp = sec * 1000000llu + usec; + + /* If just updating the timestamp. */ + if (!out_addr) + continue; + + /* Process messages only if the timestamp is newer. */ + if (timestamp <= sctx->dmesg_timestamp) + continue; + + /* Only process the first VM fault. */ + if (fault) + continue; + + /* Remove trailing \n */ + len = strlen(line); + if (len && line[len-1] == '\n') + line[len-1] = 0; + + /* Get the message part. */ + msg = strchr(line, ']'); + if (!msg) { + assert(0); + continue; + } + msg++; + + switch (progress) { + case 0: + if (strstr(msg, "GPU fault detected:")) + progress = 1; + break; + case 1: + msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR"); + if (msg) { + msg = strstr(msg, "0x"); + if (msg) { + msg += 2; + if (sscanf(msg, "%X", out_addr) == 1) + fault = true; + } + } + progress = 0; + break; + default: + progress = 0; + } + } + pclose(p); + + if (timestamp > sctx->dmesg_timestamp) + sctx->dmesg_timestamp = timestamp; + return fault; +} + +void si_check_vm_faults(struct si_context *sctx) +{ + struct pipe_screen *screen = sctx->b.b.screen; + FILE *f; + uint32_t addr; + + /* Use conservative timeout 800ms, after which we won't wait any + * longer and assume the GPU is hung. + */ + screen->fence_finish(screen, sctx->last_gfx_fence, 800*1000*1000); + + if (!si_vm_fault_occured(sctx, &addr)) + return; + + f = dd_get_debug_file(); + if (!f) + return; + + fprintf(f, "VM fault report.\n\n"); + fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen)); + fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen)); + fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); + fprintf(f, "Failing VM page: 0x%08x\n\n", addr); + + si_dump_last_ib(sctx, f); + fclose(f); + + fprintf(stderr, "Detected a VM fault, exiting...\n"); + exit(0); +} + void si_init_debug_functions(struct si_context *sctx) { sctx->b.b.dump_debug_state = si_dump_debug_state; + + /* Set the initial dmesg timestamp for this context, so that + * only new messages will be checked for VM faults. + */ + if (sctx->screen->b.debug_flags & DBG_CHECK_VM) + si_vm_fault_occured(sctx, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 1d5d42657e4..c789292e742 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -103,6 +103,10 @@ void si_context_gfx_flush(void *context, unsigned flags, if (fence) ws->fence_reference(fence, ctx->last_gfx_fence); + /* Check VM faults if needed. */ + if (ctx->screen->b.debug_flags & DBG_CHECK_VM) + si_check_vm_faults(ctx); + si_begin_new_cs(ctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9edee50ac8a..5a2b60620e3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -107,6 +107,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (sctx == NULL) return NULL; + if (sscreen->b.debug_flags & DBG_CHECK_VM) + flags |= PIPE_CONTEXT_DEBUG; + sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 847853e59e9..1c26022bb1b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -276,6 +276,7 @@ struct si_context { struct r600_resource *last_trace_buf; struct r600_resource *trace_buf; unsigned trace_id; + uint64_t dmesg_timestamp; }; /* cik_sdma.c */ @@ -310,6 +311,7 @@ void si_init_cp_dma_functions(struct si_context *sctx); /* si_debug.c */ void si_init_debug_functions(struct si_context *sctx); +void si_check_vm_faults(struct si_context *sctx); /* si_dma.c */ void si_dma_copy(struct pipe_context *ctx, -- cgit v1.2.3 From 2edb0606397d16fe88d7b488285df379aaae5893 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 26 Sep 2015 23:18:55 +0200 Subject: gallium/radeon: tell the winsys the exact resource binding types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the priority flags and expand them. This information will be used for debugging. Reviewed-by: Michel Dänzer --- src/gallium/drivers/r300/r300_emit.c | 10 ++-- src/gallium/drivers/r600/evergreen_compute.c | 4 +- src/gallium/drivers/r600/evergreen_hw_context.c | 6 +-- src/gallium/drivers/r600/evergreen_state.c | 25 +++++----- src/gallium/drivers/r600/r600_hw_context.c | 8 +-- src/gallium/drivers/r600/r600_state.c | 23 +++++---- src/gallium/drivers/r600/r600_state_common.c | 15 +++--- src/gallium/drivers/radeon/r600_pipe_common.h | 12 +++++ src/gallium/drivers/radeon/r600_query.c | 9 ++-- src/gallium/drivers/radeon/r600_streamout.c | 8 +-- src/gallium/drivers/radeon/radeon_uvd.c | 2 +- src/gallium/drivers/radeon/radeon_vce.c | 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 65 ++++++++++++++++++++----- src/gallium/drivers/radeonsi/cik_sdma.c | 8 +-- src/gallium/drivers/radeonsi/si_compute.c | 8 +-- src/gallium/drivers/radeonsi/si_cp_dma.c | 6 +-- src/gallium/drivers/radeonsi/si_descriptors.c | 37 +++++--------- src/gallium/drivers/radeonsi/si_dma.c | 8 +-- src/gallium/drivers/radeonsi/si_pm4.c | 3 +- src/gallium/drivers/radeonsi/si_state.c | 6 +-- src/gallium/drivers/radeonsi/si_state_draw.c | 10 ++-- src/gallium/drivers/radeonsi/si_state_shaders.c | 12 ++--- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 8 +-- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++-- 24 files changed, 175 insertions(+), 131 deletions(-) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 4c9971e5128..ecc4307a670 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1357,19 +1357,19 @@ validate: tex = r300_resource(texstate->sampler_views[i]->base.texture); r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ, - tex->domain, RADEON_PRIO_SHADER_TEXTURE_RO); + tex->domain, RADEON_PRIO_SAMPLER_TEXTURE); } } /* ...occlusion query buffer... */ if (r300->query_current) r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); /* ...vertex buffer for SWTCL path... */ if (r300->vbo_cs) r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs, RADEON_USAGE_READ, RADEON_DOMAIN_GTT, - RADEON_PRIO_MIN); + RADEON_PRIO_VERTEX_BUFFER); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; @@ -1385,7 +1385,7 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf, RADEON_USAGE_READ, r300_resource(buf)->domain, - RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_PRIO_SAMPLER_BUFFER); } } /* ...and index buffer for HWTCL path. */ @@ -1393,7 +1393,7 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, RADEON_USAGE_READ, r300_resource(index_buffer)->domain, - RADEON_PRIO_MIN); + RADEON_PRIO_INDEX_BUFFER); /* Now do the validation (flush is called inside cs_validate on failure). */ if (!r300->rws->cs_validate(r300->cs)) { diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 33009c16f68..6f2b7ba0db3 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -442,7 +442,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_SHADER_RW_BUFFER); radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ @@ -566,7 +566,7 @@ void evergreen_emit_cs_shader( radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, code_bo, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA)); + RADEON_PRIO_USER_SHADER)); } static void evergreen_launch_grid( diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 29bdd9daddb..89abe92cbb4 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -65,9 +65,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize); cs->buf[cs->cdw++] = dst_offset & 0xffffffff; cs->buf[cs->cdw++] = src_offset & 0xffffffff; @@ -131,7 +131,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, /* This must be done after r600_need_cs_space. */ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, clear_value); /* DATA [31:0] */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 52f4dc81d9f..a5caa0dac2b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1584,7 +1584,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, tex->cmask_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_COLOR_META); + RADEON_PRIO_CMASK); } else { cmask_reloc = reloc; } @@ -1767,7 +1767,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control); radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META); + RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -1881,7 +1881,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); } state->dirty_mask = 0; } @@ -1929,7 +1929,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); radeon_emit(cs, (buffer_id_base + buffer_index) * 8); @@ -1954,7 +1954,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); dirty_mask &= ~(1 << buffer_index); } @@ -2018,9 +2018,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, RADEON_USAGE_READ, - rview->tex_resource->b.b.nr_samples > 1 ? - RADEON_PRIO_SHADER_TEXTURE_MSAA : - RADEON_PRIO_SHADER_TEXTURE_RO); + r600_get_sampler_view_priority(rview->tex_resource)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, reloc); @@ -2140,7 +2138,8 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct (shader->buffer->gpu_address + shader->offset) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); + RADEON_USAGE_READ, + RADEON_PRIO_INTERNAL_SHADER)); } static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) @@ -2199,7 +2198,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, state->esgs_ring.buffer_size >> 8); @@ -2209,7 +2208,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, state->gsvs_ring.buffer_size >> 8); } else { @@ -3330,9 +3329,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, size = (cheight * pitch) / 4; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size); cs->buf[cs->cdw++] = base >> 8; cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) | diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index cf715976ab2..6f11366e606 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -419,9 +419,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, /* This must be done after r600_need_cs_space. */ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ @@ -472,9 +472,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx, csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize); cs->buf[cs->cdw++] = dst_offset & 0xfffffffc; cs->buf[cs->cdw++] = src_offset & 0xfffffffc; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7084c5f359b..4b171894f5c 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1605,7 +1605,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META); + RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -1720,7 +1720,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); } } @@ -1753,7 +1753,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); radeon_emit(cs, (buffer_id_base + buffer_index) * 7); @@ -1769,7 +1769,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); dirty_mask &= ~(1 << buffer_index); } @@ -1821,9 +1821,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, RADEON_USAGE_READ, - rview->tex_resource->b.b.nr_samples > 1 ? - RADEON_PRIO_SHADER_TEXTURE_MSAA : - RADEON_PRIO_SHADER_TEXTURE_RO); + r600_get_sampler_view_priority(rview->tex_resource)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); @@ -1945,7 +1943,8 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600 radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); + RADEON_USAGE_READ, + RADEON_PRIO_INTERNAL_SHADER)); } static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) @@ -1999,7 +1998,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, state->esgs_ring.buffer_size >> 8); @@ -2008,7 +2007,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, state->gsvs_ring.buffer_size >> 8); } else { @@ -2914,9 +2913,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, size = (cheight * pitch) / 4; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_TEXTURE); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size); cs->buf[cs->cdw++] = base >> 8; cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index efce852eafa..a16f1c25dcb 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1683,7 +1683,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)info.indirect, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_DRAW_INDIRECT); } if (info.indexed) { @@ -1712,7 +1713,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)ib.buffer, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_INDEX_BUFFER); } else { uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size; @@ -1724,7 +1726,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)ib.buffer, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_INDEX_BUFFER); cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = max_size; @@ -1751,7 +1754,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, t->buf_filled_size, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SO_FILLED_SIZE); } if (likely(!info.indirect)) { @@ -1941,7 +1944,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) r600_emit_command_buffer(cs, &shader->command_buffer); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); + RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER)); } unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, @@ -2669,7 +2672,7 @@ void r600_trace_emit(struct r600_context *rctx) va = rscreen->b.trace_bo->gpu_address; reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo, - RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); + RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); radeon_emit(cs, va & 0xFFFFFFFFUL); radeon_emit(cs, (va >> 32UL) & 0xFFUL); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 2df93e54559..0f9b91af315 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -623,6 +623,18 @@ static inline unsigned r600_wavefront_size(enum radeon_family family) } } +static inline enum radeon_bo_priority +r600_get_sampler_view_priority(struct r600_resource *res) +{ + if (res->b.b.target == PIPE_BUFFER) + return RADEON_PRIO_SAMPLER_BUFFER; + + if (res->b.b.nr_samples > 1) + return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; + + return RADEON_PRIO_SAMPLER_TEXTURE; +} + #define COMPUTE_DBG(rscreen, fmt, args...) \ do { \ if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index deeae0a6a65..9a5402583f4 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -226,7 +226,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q assert(0); } r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); if (r600_is_timer_query(query->type)) ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw; @@ -288,7 +288,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que assert(0); } r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); query->buffer.results_end += query->result_size; @@ -344,7 +344,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct radeon_emit(cs, va + results_base); radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF)); r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); results_base += query->result_size; /* set CONTINUE bit for all packets except the first */ @@ -990,7 +990,8 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx) radeon_emit(cs, buffer->gpu_address); radeon_emit(cs, buffer->gpu_address >> 32); - r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); /* analyze results */ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c index 5198f1e041d..33403b572af 100644 --- a/src/gallium/drivers/radeon/r600_streamout.c +++ b/src/gallium/drivers/radeon/r600_streamout.c @@ -217,7 +217,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 8); /* BUFFER_BASE */ r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT); /* R7xx requires this packet after updating BUFFER_BASE. * Without this, R7xx locks up. */ @@ -227,7 +227,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 8); r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT); } } @@ -245,7 +245,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 32); /* src address hi */ r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE); } else { /* Start from the beginning. */ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); @@ -289,7 +289,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx) radeon_emit(cs, 0); /* unused */ r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE); /* Zero the buffer size. The counters (primitives generated, * primitives emitted) may be enabled even if there is not diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 81f3f45db9f..4c59885eecf 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -111,7 +111,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, int reloc_idx; reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain, - RADEON_PRIO_MIN); + RADEON_PRIO_UVD); if (!dec->use_legacy) { uint64_t addr; addr = dec->ws->buffer_get_virtual_address(cs_buf); diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 7eab974a3df..0e01e91d02b 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -516,7 +516,7 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *b { int reloc_idx; - reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN); + reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_VCE); if (enc->use_vm) { uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 00accd5b3e6..8b17281efe7 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -178,20 +178,59 @@ enum radeon_value_id { RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */ }; +/* Each group of four has the same priority. */ enum radeon_bo_priority { - RADEON_PRIO_MIN, - RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */ - RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */ - RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */ - RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */ - RADEON_PRIO_COLOR_BUFFER, - RADEON_PRIO_DEPTH_BUFFER, - RADEON_PRIO_SHADER_TEXTURE_MSAA, - RADEON_PRIO_COLOR_BUFFER_MSAA, - RADEON_PRIO_DEPTH_BUFFER_MSAA, - RADEON_PRIO_COLOR_META, - RADEON_PRIO_DEPTH_META, - RADEON_PRIO_MAX /* must be <= 15 */ + RADEON_PRIO_FENCE = 0, + RADEON_PRIO_TRACE, + RADEON_PRIO_SO_FILLED_SIZE, + RADEON_PRIO_QUERY, + + RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ + RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ + RADEON_PRIO_DRAW_INDIRECT, + RADEON_PRIO_INDEX_BUFFER, + + RADEON_PRIO_CP_DMA = 8, + + RADEON_PRIO_VCE = 12, + RADEON_PRIO_UVD, + RADEON_PRIO_SDMA_BUFFER, + RADEON_PRIO_SDMA_TEXTURE, + + RADEON_PRIO_USER_SHADER = 16, + RADEON_PRIO_INTERNAL_SHADER, /* fetch shader, etc. */ + + /* gap: 20 */ + + RADEON_PRIO_CONST_BUFFER = 24, + RADEON_PRIO_DESCRIPTORS, + RADEON_PRIO_BORDER_COLORS, + + RADEON_PRIO_SAMPLER_BUFFER = 28, + RADEON_PRIO_VERTEX_BUFFER, + + RADEON_PRIO_SHADER_RW_BUFFER = 32, + RADEON_PRIO_RINGS_STREAMOUT, + RADEON_PRIO_SCRATCH_BUFFER, + RADEON_PRIO_COMPUTE_GLOBAL, + + RADEON_PRIO_SAMPLER_TEXTURE = 36, + RADEON_PRIO_SHADER_RW_IMAGE, + + RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 40, + + RADEON_PRIO_COLOR_BUFFER = 44, + + RADEON_PRIO_DEPTH_BUFFER = 48, + + RADEON_PRIO_COLOR_BUFFER_MSAA = 52, + + RADEON_PRIO_DEPTH_BUFFER_MSAA = 56, + + RADEON_PRIO_CMASK = 60, + RADEON_PRIO_DCC, + RADEON_PRIO_HTILE, + /* 63 is the maximum value */ }; struct winsys_handle; diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 8b0ce9f1bb8..691d379bccd 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -62,9 +62,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 7); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); for (i = 0; i < ncopy; i++) { csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE; @@ -172,9 +172,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 12); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); copy_height = size * 4 / pitch; for (i = 0; i < ncopy; i++) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e1849bad933..c6605346771 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -297,7 +297,7 @@ static void si_launch_grid( radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->scratch_bo, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_SCRATCH_BUFFER); scratch_buffer_va = shader->scratch_bo->gpu_address; } @@ -311,7 +311,7 @@ static void si_launch_grid( kernel_args_va += kernel_args_offset; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0)); @@ -340,7 +340,7 @@ static void si_launch_grid( } radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_COMPUTE_GLOBAL); } /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID @@ -362,7 +362,7 @@ static void si_launch_grid( shader_va += pc; #endif radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 32ab6a9dcbf..d4bd7b28cf3 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -160,7 +160,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* This must be done after need_cs_space. */ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_CP_DMA); /* Flush the caches for the first copy only. * Also wait for the previous CP DMA operations. */ @@ -240,9 +240,9 @@ void si_copy_buffer(struct si_context *sctx, /* This must be done after r600_need_cs_space. */ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index b07ab3b94ac..74ec7cccba8 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -118,7 +118,7 @@ static bool si_upload_descriptors(struct si_context *sctx, util_memcpy_cpu_to_le32(ptr, desc->list, list_size); radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); desc->list_dirty = false; desc->pointer_dirty = true; @@ -138,17 +138,6 @@ static void si_release_sampler_views(struct si_sampler_views *views) si_release_descriptors(&views->desc); } -static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res) -{ - if (res->b.b.target == PIPE_BUFFER) - return RADEON_PRIO_SHADER_BUFFER_RO; - - if (res->b.b.nr_samples > 1) - return RADEON_PRIO_SHADER_TEXTURE_MSAA; - - return RADEON_PRIO_SHADER_TEXTURE_RO; -} - static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_sampler_views *views) { @@ -165,13 +154,13 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ, - si_get_resource_ro_priority(rview->resource)); + r600_get_sampler_view_priority(rview->resource)); } if (!views->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); } static void si_set_sampler_view(struct si_context *sctx, unsigned shader, @@ -190,7 +179,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, if (rview->resource) radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ, - si_get_resource_ro_priority(rview->resource)); + r600_get_sampler_view_priority(rview->resource)); pipe_sampler_view_reference(&views->views[slot], view); memcpy(views->desc.list + slot*8, view_desc, 8*4); @@ -270,7 +259,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx, if (!states->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); } static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, @@ -348,7 +337,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffers->desc.buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_DESCRIPTORS); } /* VERTEX BUFFERS */ @@ -369,14 +358,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)sctx->vertex_buffer[vb].buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } if (!desc->buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_DESCRIPTORS); } static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) @@ -403,7 +392,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_DESCRIPTORS); assert(count <= SI_NUM_VERTEX_BUFFERS); @@ -447,7 +436,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) if (!bound[ve->vertex_buffer_index]) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)vb->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); bound[ve->vertex_buffer_index] = true; } } @@ -870,7 +859,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_PRIO_SAMPLER_BUFFER); } } } @@ -1017,10 +1006,10 @@ void si_init_all_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_init_buffer_resources(&sctx->const_buffers[i], SI_NUM_CONST_BUFFERS, SI_SGPR_CONST, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); si_init_buffer_resources(&sctx->rw_buffers[i], SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT); si_init_descriptors(&sctx->samplers[i].views.desc, SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 309ae04424a..3d980fb67b8 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -79,9 +79,9 @@ static void si_dma_copy_buffer(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 5); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); for (i = 0; i < ncopy; i++) { csize = size < max_csize ? size : max_csize; @@ -178,9 +178,9 @@ static void si_dma_copy_tile(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 9); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); for (i = 0; i < ncopy; i++) { cheight = copy_height; diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index b1834afa796..f16933c5f98 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -140,7 +140,8 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) struct r600_resource *ib = state->indirect_buffer; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_IB2); radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); radeon_emit(cs, ib->gpu_address); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d74f6e896c4..5d4e579b392 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2238,7 +2238,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, tex->cmask_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_COLOR_META); + RADEON_PRIO_CMASK); } radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, @@ -2285,7 +2285,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom if (zb->db_htile_data_base) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rtex->htile_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_DEPTH_META); + RADEON_PRIO_HTILE); } radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); @@ -3391,7 +3391,7 @@ static void si_init_config(struct si_context *sctx) if (sctx->b.chip_class >= CIK) si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_BORDER_COLORS); si_pm4_upload_indirect_buffer(sctx, pm4); sctx->init_config = pm4; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 6d8e0e509bf..fb65eb3ce2d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -353,7 +353,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx) if (sctx->scratch_buffer) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->scratch_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_SCRATCH_BUFFER); } sctx->emit_scratch_reloc = false; @@ -467,7 +467,7 @@ static void si_emit_draw_packets(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, t->buf_filled_size, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SO_FILLED_SIZE); } /* draw packet */ @@ -521,7 +521,7 @@ static void si_emit_draw_packets(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource *)info->indirect, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); } if (info->indexed) { @@ -531,7 +531,7 @@ static void si_emit_draw_packets(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource *)ib->buffer, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); if (info->indirect) { uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; @@ -883,7 +883,7 @@ void si_trace_emit(struct si_context *sctx) sctx->trace_id++; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf, - RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); + RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | S_370_WR_CONFIRM(1) | diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b5e14ead160..31c0ab95464 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -103,7 +103,7 @@ static void si_shader_ls(struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ @@ -138,7 +138,7 @@ static void si_shader_hs(struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); num_user_sgprs = SI_TCS_NUM_USER_SGPR; num_sgprs = shader->num_sgprs; @@ -173,7 +173,7 @@ static void si_shader_es(struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); if (shader->selector->type == PIPE_SHADER_VERTEX) { vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0; @@ -279,7 +279,7 @@ static void si_shader_gs(struct si_shader *shader) S_028B90_ENABLE(gs_num_invocations > 0)); va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40); @@ -327,7 +327,7 @@ static void si_shader_vs(struct si_shader *shader) si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0); va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); if (shader->is_gs_copy_shader) { vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */ @@ -458,7 +458,7 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask); va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 84fc40b923d..9a89ba85a29 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -409,7 +409,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = -1; - priority = MIN2(priority, 15); + assert(priority < 64); *added_domains = 0; i = amdgpu_get_reloc(cs, bo); @@ -419,7 +419,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, reloc->usage |= usage; *added_domains = domains & ~reloc->domains; reloc->domains |= domains; - cs->flags[i] = MAX2(cs->flags[i], priority); + cs->flags[i] = MAX2(cs->flags[i], priority / 4); return i; } @@ -441,7 +441,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, cs->buffers[cs->num_buffers].bo = NULL; amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo); cs->handles[cs->num_buffers] = bo->bo; - cs->flags[cs->num_buffers] = priority; + cs->flags[cs->num_buffers] = priority / 4; p_atomic_inc(&bo->num_cs_references); reloc = &cs->buffers[cs->num_buffers]; reloc->bo = bo; @@ -622,7 +622,7 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, } amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer, - RADEON_USAGE_READ, 0, RADEON_PRIO_MIN); + RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); /* If the CS is not empty or overflowed.... */ if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 2c4f990944c..b277efecf61 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -263,14 +263,14 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; int i = -1; - priority = MIN2(priority, 15); + assert(priority < 64); *added_domains = 0; i = radeon_get_reloc(csc, bo); if (i >= 0) { reloc = &csc->relocs[i]; - update_reloc(reloc, rd, wd, priority, added_domains); + update_reloc(reloc, rd, wd, priority / 4, added_domains); /* For async DMA, every add_reloc call must add a buffer to the list * no matter how many duplicates there are. This is due to the fact @@ -309,7 +309,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, reloc->handle = bo->handle; reloc->read_domains = rd; reloc->write_domain = wd; - reloc->flags = priority; + reloc->flags = priority / 4; csc->reloc_indices_hashlist[hash] = csc->crelocs; @@ -328,7 +328,8 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains); + unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, + &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->base.size; @@ -633,7 +634,7 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs) /* Add the fence as a dummy relocation. */ cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence), RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT, - RADEON_PRIO_MIN); + RADEON_PRIO_FENCE); return (struct pipe_fence_handle*)fence; } -- cgit v1.2.3 From 93641f43416b8b8be8944e9d1473369bfda7f302 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 00:10:00 +0200 Subject: gallium/radeon: stop using "reloc" in a few places MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/r300/r300_cs.h | 4 +-- src/gallium/drivers/r300/r300_emit.c | 18 +++++----- src/gallium/drivers/radeon/r600_cs.h | 2 +- src/gallium/drivers/radeon/radeon_uvd.c | 2 +- src/gallium/drivers/radeon/radeon_vce.c | 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 23 ++++++------ src/gallium/drivers/radeonsi/si_descriptors.c | 4 +-- src/gallium/drivers/radeonsi/si_hw_context.c | 2 +- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 50 +++++++++++++-------------- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 8 ++--- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 40 ++++++++++----------- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 8 ++--- 12 files changed, 81 insertions(+), 82 deletions(-) diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index fc150542d4b..a2d042ca48e 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -103,14 +103,14 @@ /** - * Writing relocations. + * Writing buffers. */ #define OUT_CS_RELOC(r) do { \ assert((r)); \ assert((r)->cs_buf); \ OUT_CS(0xc0001000); /* PKT3_NOP */ \ - OUT_CS(cs_winsys->cs_get_reloc(cs_copy, (r)->cs_buf) * 4); \ + OUT_CS(cs_winsys->cs_lookup_buffer(cs_copy, (r)->cs_buf) * 4); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index ecc4307a670..7610c3ddf5b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1049,7 +1049,7 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) assert(r300->vbo_cs); OUT_CS(0xc0001000); /* PKT3_NOP */ - OUT_CS(r300->rws->cs_get_reloc(r300->cs, r300->vbo_cs) * 4); + OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo_cs) * 4); END_CS; } @@ -1320,7 +1320,7 @@ validate: continue; tex = r300_resource(fb->cbufs[i]->texture); assert(tex && tex->buf && "cbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, + r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READWRITE, r300_surface(fb->cbufs[i])->domain, tex->b.b.nr_samples > 1 ? @@ -1331,7 +1331,7 @@ validate: if (fb->zsbuf) { tex = r300_resource(fb->zsbuf->texture); assert(tex && tex->buf && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, + r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READWRITE, r300_surface(fb->zsbuf)->domain, tex->b.b.nr_samples > 1 ? @@ -1342,7 +1342,7 @@ validate: /* The AA resolve buffer. */ if (r300->aa_state.dirty) { if (aa->dest) { - r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf, + r300->rws->cs_add_buffer(r300->cs, aa->dest->cs_buf, RADEON_USAGE_WRITE, aa->dest->domain, RADEON_PRIO_COLOR_BUFFER); @@ -1356,18 +1356,18 @@ validate: } tex = r300_resource(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ, + r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READ, tex->domain, RADEON_PRIO_SAMPLER_TEXTURE); } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, + r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buf, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT, RADEON_PRIO_QUERY); /* ...vertex buffer for SWTCL path... */ if (r300->vbo_cs) - r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs, + r300->rws->cs_add_buffer(r300->cs, r300->vbo_cs, RADEON_USAGE_READ, RADEON_DOMAIN_GTT, RADEON_PRIO_VERTEX_BUFFER); /* ...vertex buffers for HWTCL path... */ @@ -1382,7 +1382,7 @@ validate: if (!buf) continue; - r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf, + r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->cs_buf, RADEON_USAGE_READ, r300_resource(buf)->domain, RADEON_PRIO_SAMPLER_BUFFER); @@ -1390,7 +1390,7 @@ validate: } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, + r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->cs_buf, RADEON_USAGE_READ, r300_resource(index_buffer)->domain, RADEON_PRIO_INDEX_BUFFER); diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h index fa40dc42a31..b5a1dafb273 100644 --- a/src/gallium/drivers/radeon/r600_cs.h +++ b/src/gallium/drivers/radeon/r600_cs.h @@ -65,7 +65,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); } } - return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, + return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage, rbo->domains, priority) * 4; } diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 4c59885eecf..c3ac7e7f2ef 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -110,7 +110,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, { int reloc_idx; - reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain, + reloc_idx = dec->ws->cs_add_buffer(dec->cs, cs_buf, usage, domain, RADEON_PRIO_UVD); if (!dec->use_legacy) { uint64_t addr; diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 0e01e91d02b..0dac6fbbdce 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -516,7 +516,7 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *b { int reloc_idx; - reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_VCE); + reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE); if (enc->use_vm) { uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 8b17281efe7..3049852c5a5 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -595,18 +595,17 @@ struct radeon_winsys { void (*cs_destroy)(struct radeon_winsys_cs *cs); /** - * Add a new buffer relocation. Every relocation must first be added - * before it can be written. + * Add a buffer. Each buffer used by a CS must be added using this function. * - * \param cs A command stream to add buffer for validation against. - * \param buf A winsys buffer to validate. + * \param cs Command stream + * \param buf Buffer * \param usage Whether the buffer is used for read and/or write. * \param domain Bitmask of the RADEON_DOMAIN_* flags. * \param priority A higher number means a greater chance of being * placed in the requested domain. 15 is the maximum. - * \return Relocation index. + * \return Buffer index. */ - unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs, + unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs, struct radeon_winsys_cs_handle *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domain, @@ -619,21 +618,21 @@ struct radeon_winsys { * \param buf Buffer * \return The buffer index, or -1 if the buffer has not been added. */ - int (*cs_get_reloc)(struct radeon_winsys_cs *cs, - struct radeon_winsys_cs_handle *buf); + int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs, + struct radeon_winsys_cs_handle *buf); /** - * Return TRUE if there is enough memory in VRAM and GTT for the relocs - * added so far. If the validation fails, all the relocations which have + * Return TRUE if there is enough memory in VRAM and GTT for the buffers + * added so far. If the validation fails, all buffers which have * been added since the last call of cs_validate will be removed and - * the CS will be flushed (provided there are still any relocations). + * the CS will be flushed (provided there are still any buffers). * * \param cs A command stream to validate. */ boolean (*cs_validate)(struct radeon_winsys_cs *cs); /** - * Return TRUE if there is enough memory in VRAM and GTT for the relocs + * Return TRUE if there is enough memory in VRAM and GTT for the buffers * added so far. * * \param cs A command stream to validate. diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 74ec7cccba8..19dd14f9b6f 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -143,7 +143,7 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, { uint64_t mask = views->desc.enabled_mask; - /* Add relocations to the CS. */ + /* Add buffers to the CS. */ while (mask) { int i = u_bit_scan64(&mask); struct si_sampler_view *rview = @@ -324,7 +324,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, { uint64_t mask = buffers->desc.enabled_mask; - /* Add relocations to the CS. */ + /* Add buffers to the CS. */ while (mask) { int i = u_bit_scan64(&mask); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index c789292e742..de95d12f000 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -32,7 +32,7 @@ void si_need_cs_space(struct si_context *ctx) struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; /* There are two memory usage counters in the winsys for all buffers - * that have been added (cs_add_reloc) and two counters in the pipe + * that have been added (cs_add_buffer) and two counters in the pipe * driver for those that haven't been added yet. */ if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 9a89ba85a29..19a20048470 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -370,7 +370,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) -int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) +int amdgpu_lookup_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) { unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = cs->buffer_indices_hashlist[hash]; @@ -379,15 +379,15 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) if (i == -1 || cs->buffers[i].bo == bo) return i; - /* Hash collision, look for the BO in the list of relocs linearly. */ + /* Hash collision, look for the BO in the list of buffers linearly. */ for (i = cs->num_buffers - 1; i >= 0; i--) { if (cs->buffers[i].bo == bo) { - /* Put this reloc in the hash list. + /* Put this buffer in the hash list. * This will prevent additional hash collisions if there are - * several consecutive get_reloc calls for the same buffer. + * several consecutive lookup_buffer calls for the same buffer. * * Example: Assuming buffers A,B,C collide in the hash list, - * the following sequence of relocs: + * the following sequence of buffers: * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC * will collide here: ^ and here: ^, * meaning that we should get very few collisions in the end. */ @@ -398,32 +398,32 @@ int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo) return -1; } -static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, +static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, unsigned priority, enum radeon_bo_domain *added_domains) { - struct amdgpu_cs_buffer *reloc; + struct amdgpu_cs_buffer *buffer; unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = -1; assert(priority < 64); *added_domains = 0; - i = amdgpu_get_reloc(cs, bo); + i = amdgpu_lookup_buffer(cs, bo); if (i >= 0) { - reloc = &cs->buffers[i]; - reloc->usage |= usage; - *added_domains = domains & ~reloc->domains; - reloc->domains |= domains; + buffer = &cs->buffers[i]; + buffer->usage |= usage; + *added_domains = domains & ~buffer->domains; + buffer->domains |= domains; cs->flags[i] = MAX2(cs->flags[i], priority / 4); return i; } - /* New relocation, check if the backing array is large enough. */ + /* New buffer, check if the backing array is large enough. */ if (cs->num_buffers >= cs->max_num_buffers) { uint32_t size; cs->max_num_buffers += 10; @@ -437,16 +437,16 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, cs->flags = realloc(cs->flags, cs->max_num_buffers); } - /* Initialize the new relocation. */ + /* Initialize the new buffer. */ cs->buffers[cs->num_buffers].bo = NULL; amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo); cs->handles[cs->num_buffers] = bo->bo; cs->flags[cs->num_buffers] = priority / 4; p_atomic_inc(&bo->num_cs_references); - reloc = &cs->buffers[cs->num_buffers]; - reloc->bo = bo; - reloc->usage = usage; - reloc->domains = domains; + buffer = &cs->buffers[cs->num_buffers]; + buffer->bo = bo; + buffer->usage = usage; + buffer->domains = domains; cs->buffer_indices_hashlist[hash] = cs->num_buffers; @@ -454,7 +454,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, return cs->num_buffers++; } -static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs, +static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs, struct radeon_winsys_cs_handle *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domains, @@ -466,7 +466,7 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs, struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; enum radeon_bo_domain added_domains; - unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain, + unsigned index = amdgpu_add_buffer(cs, bo, usage, bo->initial_domain, priority, &added_domains); if (added_domains & RADEON_DOMAIN_GTT) @@ -477,12 +477,12 @@ static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs, return index; } -static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs, +static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs, struct radeon_winsys_cs_handle *buf) { struct amdgpu_cs *cs = amdgpu_cs(rcs); - return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf); + return amdgpu_lookup_buffer(cs, (struct amdgpu_winsys_bo*)buf); } static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs) @@ -621,7 +621,7 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, fprintf(stderr, "amdgpu: command stream overflowed\n"); } - amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer, + amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer, RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); /* If the CS is not empty or overflowed.... */ @@ -682,8 +682,8 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; ws->base.cs_create = amdgpu_cs_create; ws->base.cs_destroy = amdgpu_cs_destroy; - ws->base.cs_add_reloc = amdgpu_cs_add_reloc; - ws->base.cs_get_reloc = amdgpu_cs_get_reloc; + ws->base.cs_add_buffer = amdgpu_cs_add_buffer; + ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer; ws->base.cs_validate = amdgpu_cs_validate; ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit; ws->base.cs_flush = amdgpu_cs_flush; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 12c6b624b03..1955fe2ed44 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -68,7 +68,7 @@ struct amdgpu_cs { struct amdgpu_cs_request request; struct amdgpu_cs_ib_info ib; - /* Relocs. */ + /* Buffers. */ unsigned max_num_buffers; unsigned num_buffers; amdgpu_bo_handle *handles; @@ -115,7 +115,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst, *rdst = rsrc; } -int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo); +int amdgpu_lookup_buffer(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo); static inline struct amdgpu_cs * amdgpu_cs(struct radeon_winsys_cs *base) @@ -129,7 +129,7 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs, { int num_refs = bo->num_cs_references; return num_refs == bo->rws->num_cs || - (num_refs && amdgpu_get_reloc(cs, bo) != -1); + (num_refs && amdgpu_lookup_buffer(cs, bo) != -1); } static inline boolean @@ -142,7 +142,7 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs, if (!bo->num_cs_references) return FALSE; - index = amdgpu_get_reloc(cs, bo); + index = amdgpu_lookup_buffer(cs, bo); if (index == -1) return FALSE; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index b277efecf61..6e707b699c4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -37,13 +37,13 @@ /* This file replaces libdrm's radeon_cs_gem with our own implemention. It's optimized specifically for Radeon DRM. - Reloc writes and space checking are faster and simpler than their + Adding buffers and space checking are faster and simpler than their counterparts in libdrm (the time complexity of all the functions is O(1) in nearly all scenarios, thanks to hashing). It works like this: - cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and + cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and also adds the size of 'buf' to the used_gart and used_vram winsys variables based on the domains, which are simply or'd for the accounting purposes. The adding is skipped if the reloc is already present in the list, but it @@ -58,8 +58,8 @@ (done in the pipe driver) cs_write_reloc(cs, buf) just writes a reloc that has been added using - cs_add_reloc. The read_domain and write_domain parameters have been removed, - because we already specify them in cs_add_reloc. + cs_add_buffer. The read_domain and write_domain parameters have been removed, + because we already specify them in cs_add_buffer. */ #include "radeon_drm_cs.h" @@ -221,7 +221,7 @@ static inline void update_reloc(struct drm_radeon_cs_reloc *reloc, reloc->flags = MAX2(reloc->flags, priority); } -int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) +int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) { unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1); int i = csc->reloc_indices_hashlist[hash]; @@ -235,7 +235,7 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) if (csc->relocs_bo[i] == bo) { /* Put this reloc in the hash list. * This will prevent additional hash collisions if there are - * several consecutive get_reloc calls for the same buffer. + * several consecutive lookup_buffer calls for the same buffer. * * Example: Assuming buffers A,B,C collide in the hash list, * the following sequence of relocs: @@ -249,7 +249,7 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) return -1; } -static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, +static unsigned radeon_add_buffer(struct radeon_drm_cs *cs, struct radeon_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, @@ -266,13 +266,13 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, assert(priority < 64); *added_domains = 0; - i = radeon_get_reloc(csc, bo); + i = radeon_lookup_buffer(csc, bo); if (i >= 0) { reloc = &csc->relocs[i]; update_reloc(reloc, rd, wd, priority / 4, added_domains); - /* For async DMA, every add_reloc call must add a buffer to the list + /* For async DMA, every add_buffer call must add a buffer to the list * no matter how many duplicates there are. This is due to the fact * the DMA CS checker doesn't use NOP packets for offset patching, * but always uses the i-th buffer from the list to patch the i-th @@ -319,7 +319,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, return csc->crelocs++; } -static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, +static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs, struct radeon_winsys_cs_handle *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domains, @@ -328,8 +328,8 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, - &added_domains); + unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority, + &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->base.size; @@ -339,12 +339,12 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, return index; } -static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs, +static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs, struct radeon_winsys_cs_handle *buf) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf); + return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf); } static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) @@ -357,9 +357,9 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) if (status) { cs->csc->validated_crelocs = cs->csc->crelocs; } else { - /* Remove lately-added relocations. The validation failed with them + /* Remove lately-added buffers. The validation failed with them * and the CS is about to be flushed because of that. Keep only - * the already-validated relocations. */ + * the already-validated buffers. */ unsigned i; for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { @@ -608,7 +608,7 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, if (!bo->num_cs_references) return FALSE; - index = radeon_get_reloc(cs->csc, bo); + index = radeon_lookup_buffer(cs->csc, bo); if (index == -1) return FALSE; @@ -632,7 +632,7 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs) fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE, RADEON_DOMAIN_GTT, 0); /* Add the fence as a dummy relocation. */ - cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence), + cs->ws->base.cs_add_buffer(rcs, cs->ws->base.buffer_get_cs_handle(fence), RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT, RADEON_PRIO_FENCE); return (struct pipe_fence_handle*)fence; @@ -658,8 +658,8 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.ctx_destroy = radeon_drm_ctx_destroy; ws->base.cs_create = radeon_drm_cs_create; ws->base.cs_destroy = radeon_drm_cs_destroy; - ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; - ws->base.cs_get_reloc = radeon_drm_cs_get_reloc; + ws->base.cs_add_buffer = radeon_drm_cs_add_buffer; + ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer; ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; ws->base.cs_flush = radeon_drm_cs_flush; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 6ceb8e98ee7..f69e1cea09f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -40,7 +40,7 @@ struct radeon_cs_context { uint32_t cs_trace_id; - /* Relocs. */ + /* Buffers. */ unsigned nrelocs; unsigned crelocs; unsigned validated_crelocs; @@ -77,7 +77,7 @@ struct radeon_drm_cs { struct radeon_bo *trace_buf; }; -int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo); +int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo); static inline struct radeon_drm_cs * radeon_drm_cs(struct radeon_winsys_cs *base) @@ -91,7 +91,7 @@ radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, { int num_refs = bo->num_cs_references; return num_refs == bo->rws->num_cs || - (num_refs && radeon_get_reloc(cs->csc, bo) != -1); + (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1); } static inline boolean @@ -103,7 +103,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs, if (!bo->num_cs_references) return FALSE; - index = radeon_get_reloc(cs->csc, bo); + index = radeon_lookup_buffer(cs->csc, bo); if (index == -1) return FALSE; -- cgit v1.2.3 From 6f48e2bee15c484c4a4685712c6ba1f379ef6853 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 00:52:32 +0200 Subject: winsys/amdgpu: add winsys function cs_get_buffer_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For debugging. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeon/radeon_winsys.h | 16 ++++++++++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 19 +++++++++++++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 1 + 3 files changed, 36 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 3049852c5a5..b91e1adf41d 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -368,6 +368,12 @@ struct radeon_surf { uint32_t num_banks; }; +struct radeon_bo_list_item { + struct pb_buffer *buf; + uint64_t vm_address; + uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ +}; + struct radeon_winsys { /** * The screen object this winsys was created for @@ -641,6 +647,16 @@ struct radeon_winsys { */ boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt); + /** + * Return the buffer list. + * + * \param cs Command stream + * \param list Returned buffer list. Set to NULL to query the count only. + * \return The buffer count. + */ + unsigned (*cs_get_buffer_list)(struct radeon_winsys_cs *cs, + struct radeon_bo_list_item *list); + /** * Flush a command stream. * diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 19a20048470..48f76cfe8af 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -416,6 +416,7 @@ static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs, if (i >= 0) { buffer = &cs->buffers[i]; + buffer->priority_usage |= 1llu << priority; buffer->usage |= usage; *added_domains = domains & ~buffer->domains; buffer->domains |= domains; @@ -445,6 +446,7 @@ static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs, p_atomic_inc(&bo->num_cs_references); buffer = &cs->buffers[cs->num_buffers]; buffer->bo = bo; + buffer->priority_usage = 1llu << priority; buffer->usage = usage; buffer->domains = domains; @@ -500,6 +502,22 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64 return status; } +static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs, + struct radeon_bo_list_item *list) +{ + struct amdgpu_cs *cs = amdgpu_cs(rcs); + int i; + + if (list) { + for (i = 0; i < cs->num_buffers; i++) { + pb_reference(&list[i].buf, &cs->buffers[i].bo->base); + list[i].vm_address = cs->buffers[i].bo->va; + list[i].priority_usage = cs->buffers[i].priority_usage; + } + } + return cs->num_buffers; +} + static void amdgpu_cs_do_submission(struct amdgpu_cs *cs, struct pipe_fence_handle **out_fence) { @@ -686,6 +704,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer; ws->base.cs_validate = amdgpu_cs_validate; ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit; + ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list; ws->base.cs_flush = amdgpu_cs_flush; ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced; ws->base.cs_sync_flush = amdgpu_cs_sync_flush; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 1955fe2ed44..bae5d73bb3c 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -45,6 +45,7 @@ struct amdgpu_ctx { struct amdgpu_cs_buffer { struct amdgpu_winsys_bo *bo; + uint64_t priority_usage; enum radeon_bo_usage usage; enum radeon_bo_domain domains; }; -- cgit v1.2.3 From eb55610c89af669f442418f32df6df60fc412867 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 00:58:15 +0200 Subject: winsys/radeon: implement cs_get_buffer_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is more complicated, because tracking priority_usage needed changing the relocs_bo type. Reviewed-by: Michel Dänzer --- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 45 +++++++++++++++------- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 8 +++- src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c | 16 ++++---- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 6e707b699c4..32b56f989cd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -99,8 +99,8 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, csc->fd = ws->fd; csc->nrelocs = 512; - csc->relocs_bo = (struct radeon_bo**) - CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); + csc->relocs_bo = (struct radeon_bo_item*) + CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0])); if (!csc->relocs_bo) { return FALSE; } @@ -139,8 +139,8 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) unsigned i; for (i = 0; i < csc->crelocs; i++) { - p_atomic_dec(&csc->relocs_bo[i]->num_cs_references); - radeon_bo_reference(&csc->relocs_bo[i], NULL); + p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references); + radeon_bo_reference(&csc->relocs_bo[i].bo, NULL); } csc->crelocs = 0; @@ -227,12 +227,12 @@ int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) int i = csc->reloc_indices_hashlist[hash]; /* not found or found */ - if (i == -1 || csc->relocs_bo[i] == bo) + if (i == -1 || csc->relocs_bo[i].bo == bo) return i; /* Hash collision, look for the BO in the list of relocs linearly. */ for (i = csc->crelocs - 1; i >= 0; i--) { - if (csc->relocs_bo[i] == bo) { + if (csc->relocs_bo[i].bo == bo) { /* Put this reloc in the hash list. * This will prevent additional hash collisions if there are * several consecutive lookup_buffer calls for the same buffer. @@ -271,6 +271,7 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs, if (i >= 0) { reloc = &csc->relocs[i]; update_reloc(reloc, rd, wd, priority / 4, added_domains); + csc->relocs_bo[i].priority_usage |= 1llu << priority; /* For async DMA, every add_buffer call must add a buffer to the list * no matter how many duplicates there are. This is due to the fact @@ -292,7 +293,7 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs, uint32_t size; csc->nrelocs += 10; - size = csc->nrelocs * sizeof(struct radeon_bo*); + size = csc->nrelocs * sizeof(csc->relocs_bo[0]); csc->relocs_bo = realloc(csc->relocs_bo, size); size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); @@ -302,8 +303,9 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs, } /* Initialize the new relocation. */ - csc->relocs_bo[csc->crelocs] = NULL; - radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo); + csc->relocs_bo[csc->crelocs].bo = NULL; + csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority; + radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo); p_atomic_inc(&bo->num_cs_references); reloc = &csc->relocs[csc->crelocs]; reloc->handle = bo->handle; @@ -363,8 +365,8 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) unsigned i; for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { - p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references); - radeon_bo_reference(&cs->csc->relocs_bo[i], NULL); + p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references); + radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL); } cs->csc->crelocs = cs->csc->validated_crelocs; @@ -398,6 +400,22 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui return gtt < cs->ws->info.gart_size * 0.7; } +static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs, + struct radeon_bo_list_item *list) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + int i; + + if (list) { + for (i = 0; i < cs->csc->crelocs; i++) { + pb_reference(&list[i].buf, &cs->csc->relocs_bo[i].bo->base); + list[i].vm_address = cs->csc->relocs_bo[i].bo->va; + list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage; + } + } + return cs->csc->crelocs; +} + void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc) { unsigned i; @@ -426,7 +444,7 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs } for (i = 0; i < csc->crelocs; i++) - p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); + p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls); radeon_cs_context_cleanup(csc); } @@ -514,7 +532,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ - p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); + p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls); } switch (cs->base.ring_type) { @@ -662,6 +680,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer; ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; + ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list; ws->base.cs_flush = radeon_drm_cs_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index f69e1cea09f..81f66f56d99 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -29,6 +29,11 @@ #include "radeon_drm_bo.h" +struct radeon_bo_item { + struct radeon_bo *bo; + uint64_t priority_usage; +}; + struct radeon_cs_context { uint32_t buf[16 * 1024]; @@ -44,8 +49,9 @@ struct radeon_cs_context { unsigned nrelocs; unsigned crelocs; unsigned validated_crelocs; - struct radeon_bo **relocs_bo; + struct radeon_bo_item *relocs_bo; struct drm_radeon_cs_reloc *relocs; + uint64_t *priority_usage; int reloc_indices_hashlist[512]; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c index 09665df3048..99585956a49 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c @@ -54,7 +54,7 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context } memset(&args, 0, sizeof(args)); - args.handle = csc->relocs_bo[0]->handle; + args.handle = csc->relocs_bo[0].bo->handle; for (i = 0; i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT; i++) { usleep(1); lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)); @@ -94,15 +94,15 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context fprintf(dump, "\n"); for (i = 0; i < csc->crelocs; i++) { - unsigned j, ndw = (csc->relocs_bo[i]->base.size + 3) >> 2; + unsigned j, ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2; - ptr = radeon_bo_do_map(csc->relocs_bo[i]); + ptr = radeon_bo_do_map(csc->relocs_bo[i].bo); if (ptr) { fprintf(dump, "static uint32_t bo_%04d_data[%d] = {\n ", i, ndw); for (j = 0; j < ndw; j++) { if (j && !(j % 8)) { uint32_t offset = (j - 8) << 2; - fprintf(dump, " /* [0x%08x] va[0x%016"PRIx64"] */\n ", offset, offset + csc->relocs_bo[i]->va); + fprintf(dump, " /* [0x%08x] va[0x%016"PRIx64"] */\n ", offset, offset + csc->relocs_bo[i].bo->va); } fprintf(dump, " 0x%08x,", ptr[j]); } @@ -139,16 +139,16 @@ void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context fprintf(dump, "\n"); for (i = 0; i < csc->crelocs; i++) { - unsigned ndw = (csc->relocs_bo[i]->base.size + 3) >> 2; + unsigned ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2; uint32_t *ptr; - ptr = radeon_bo_do_map(csc->relocs_bo[i]); + ptr = radeon_bo_do_map(csc->relocs_bo[i].bo); if (ptr) { fprintf(dump, " bo[%d] = bo_new(&ctx, %d, bo_%04d_data, 0x%016"PRIx64", 0x%08x);\n", - i, ndw, i, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment); + i, ndw, i, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment); } else { fprintf(dump, " bo[%d] = bo_new(&ctx, %d, NULL, 0x%016"PRIx64", 0x%08x);\n", - i, ndw, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment); + i, ndw, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment); } } fprintf(dump, "\n"); -- cgit v1.2.3 From cc92b9037507ccfb498bdcec27b4d186e230004f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 01:38:48 +0200 Subject: radeonsi: dump buffer lists while debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_debug.c | 110 +++++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_hw_context.c | 15 +++- src/gallium/drivers/radeonsi/si_pipe.c | 5 ++ src/gallium/drivers/radeonsi/si_pipe.h | 2 + 4 files changed, 131 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 3d127236831..7d41e8d00e0 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -420,6 +420,114 @@ static void si_dump_last_ib(struct si_context *sctx, FILE *f) r600_resource_reference(&sctx->last_trace_buf, NULL); } +static const char *priority_to_string(enum radeon_bo_priority priority) +{ +#define ITEM(x) [RADEON_PRIO_##x] = #x + static const char *table[64] = { + ITEM(FENCE), + ITEM(TRACE), + ITEM(SO_FILLED_SIZE), + ITEM(QUERY), + ITEM(IB1), + ITEM(IB2), + ITEM(DRAW_INDIRECT), + ITEM(INDEX_BUFFER), + ITEM(CP_DMA), + ITEM(VCE), + ITEM(UVD), + ITEM(SDMA_BUFFER), + ITEM(SDMA_TEXTURE), + ITEM(USER_SHADER), + ITEM(INTERNAL_SHADER), + ITEM(CONST_BUFFER), + ITEM(DESCRIPTORS), + ITEM(BORDER_COLORS), + ITEM(SAMPLER_BUFFER), + ITEM(VERTEX_BUFFER), + ITEM(SHADER_RW_BUFFER), + ITEM(RINGS_STREAMOUT), + ITEM(SCRATCH_BUFFER), + ITEM(COMPUTE_GLOBAL), + ITEM(SAMPLER_TEXTURE), + ITEM(SHADER_RW_IMAGE), + ITEM(SAMPLER_TEXTURE_MSAA), + ITEM(COLOR_BUFFER), + ITEM(DEPTH_BUFFER), + ITEM(COLOR_BUFFER_MSAA), + ITEM(DEPTH_BUFFER_MSAA), + ITEM(CMASK), + ITEM(DCC), + ITEM(HTILE), + }; +#undef ITEM + + assert(priority < ARRAY_SIZE(table)); + return table[priority]; +} + +static int bo_list_compare_va(const struct radeon_bo_list_item *a, + const struct radeon_bo_list_item *b) +{ + return a->vm_address < b->vm_address ? -1 : + a->vm_address > b->vm_address ? 1 : 0; +} + +static void si_dump_last_bo_list(struct si_context *sctx, FILE *f) +{ + unsigned i,j; + + if (!sctx->last_bo_list) + return; + + /* Sort the list according to VM adddresses first. */ + qsort(sctx->last_bo_list, sctx->last_bo_count, + sizeof(sctx->last_bo_list[0]), (void*)bo_list_compare_va); + + fprintf(f, "Buffer list (in units of pages = 4kB):\n" + COLOR_YELLOW " Size VM start page " + "VM end page Usage" COLOR_RESET "\n"); + + for (i = 0; i < sctx->last_bo_count; i++) { + /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */ + const unsigned page_size = 4096; + uint64_t va = sctx->last_bo_list[i].vm_address; + uint64_t size = sctx->last_bo_list[i].buf->size; + bool hit = false; + + /* If there's unused virtual memory between 2 buffers, print it. */ + if (i) { + uint64_t previous_va_end = sctx->last_bo_list[i-1].vm_address + + sctx->last_bo_list[i-1].buf->size; + + if (va > previous_va_end) { + fprintf(f, " %10"PRIu64" -- hole --\n", + (va - previous_va_end) / page_size); + } + } + + /* Print the buffer. */ + fprintf(f, " %10"PRIu64" 0x%013"PRIx64" 0x%013"PRIx64" ", + size / page_size, va / page_size, (va + size) / page_size); + + /* Print the usage. */ + for (j = 0; j < 64; j++) { + if (!(sctx->last_bo_list[i].priority_usage & (1llu << j))) + continue; + + fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j)); + hit = true; + } + fprintf(f, "\n"); + } + fprintf(f, "\nNote: The holes represent memory not used by the IB.\n" + " Other buffers can still be allocated there.\n\n"); + + for (i = 0; i < sctx->last_bo_count; i++) + pb_reference(&sctx->last_bo_list[i].buf, NULL); + free(sctx->last_bo_list); + sctx->last_bo_list = NULL; +} + static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flags) { @@ -434,6 +542,7 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_shader(sctx->gs_shader, "Geometry", f); si_dump_shader(sctx->ps_shader, "Fragment", f); + si_dump_last_bo_list(sctx, f); si_dump_last_ib(sctx, f); fprintf(f, "Done.\n"); @@ -538,6 +647,7 @@ void si_check_vm_faults(struct si_context *sctx) fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); fprintf(f, "Failing VM page: 0x%08x\n\n", addr); + si_dump_last_bo_list(sctx, f); si_dump_last_ib(sctx, f); fclose(f); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index de95d12f000..17d89d16e24 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -85,14 +85,27 @@ void si_context_gfx_flush(void *context, unsigned flags, if (ctx->trace_buf) si_trace_emit(ctx); - /* Save the IB for debug contexts. */ if (ctx->is_debug) { + unsigned i; + + /* Save the IB for debug contexts. */ free(ctx->last_ib); ctx->last_ib_dw_size = cs->cdw; ctx->last_ib = malloc(cs->cdw * 4); memcpy(ctx->last_ib, cs->buf, cs->cdw * 4); r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); r600_resource_reference(&ctx->trace_buf, NULL); + + /* Save the buffer list. */ + if (ctx->last_bo_list) { + for (i = 0; i < ctx->last_bo_count; i++) + pb_reference(&ctx->last_bo_list[i].buf, NULL); + free(ctx->last_bo_list); + } + ctx->last_bo_count = ws->cs_get_buffer_list(cs, NULL); + ctx->last_bo_list = calloc(ctx->last_bo_count, + sizeof(ctx->last_bo_list[0])); + ws->cs_get_buffer_list(cs, ctx->last_bo_list); } /* Flush the CS. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5a2b60620e3..cdd33aa0831 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -81,6 +81,11 @@ static void si_destroy_context(struct pipe_context *context) r600_resource_reference(&sctx->trace_buf, NULL); r600_resource_reference(&sctx->last_trace_buf, NULL); free(sctx->last_ib); + if (sctx->last_bo_list) { + for (i = 0; i < sctx->last_bo_count; i++) + pb_reference(&sctx->last_bo_list[i].buf, NULL); + free(sctx->last_bo_list); + } FREE(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1c26022bb1b..41b2832322c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -277,6 +277,8 @@ struct si_context { struct r600_resource *trace_buf; unsigned trace_id; uint64_t dmesg_timestamp; + unsigned last_bo_count; + struct radeon_bo_list_item *last_bo_list; }; /* cik_sdma.c */ -- cgit v1.2.3 From 5804c6adf8361adb5d04feaf578dffbf840475d6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 6 Sep 2015 17:35:06 +0200 Subject: gallium/radeon: add separate stencil level dirty flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will only do depth-only or stencil-only decompress blits, whichever is needed by textures, instead of always doing both. Reviewed-by: Michel Dänzer --- src/gallium/drivers/r600/evergreen_state.c | 4 ++-- src/gallium/drivers/r600/r600_state_common.c | 3 +++ src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/cik_sdma.c | 2 +- src/gallium/drivers/radeonsi/si_dma.c | 4 ++-- src/gallium/drivers/radeonsi/si_state_draw.c | 3 +++ 6 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index a5caa0dac2b..6169c5b719b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3375,11 +3375,11 @@ static void evergreen_dma_copy(struct pipe_context *ctx, } if (src->format != dst->format || src_box->depth > 1 || - rdst->dirty_level_mask != 0) { + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) { goto fallback; } - if (rsrc->dirty_level_mask) { + if (rsrc->dirty_level_mask & (1 << src_level)) { ctx->flush_resource(ctx, src); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a16f1c25dcb..178005a8574 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1779,6 +1779,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info struct r600_texture *rtex = (struct r600_texture *)surf->texture; rtex->dirty_level_mask |= 1 << surf->u.tex.level; + + if (rtex->surface.flags & RADEON_SURF_SBUFFER) + rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; } if (rctx->framebuffer.compressed_cb_mask) { struct pipe_surface *surf; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 0f9b91af315..b58b500bd76 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -205,6 +205,7 @@ struct r600_texture { unsigned pitch_override; bool is_depth; unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ + unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */ struct r600_texture *flushed_depth_texture; boolean is_flushing_texture; struct radeon_surf surface; diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 691d379bccd..6454b8ce8c0 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -242,7 +242,7 @@ void cik_sdma_copy(struct pipe_context *ctx, if (src->format != dst->format || rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 || - rdst->dirty_level_mask & (1 << dst_level)) { + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 3d980fb67b8..31b0b41e5a4 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -246,13 +246,13 @@ void si_dma_copy(struct pipe_context *ctx, goto fallback; if (src->format != dst->format || src_box->depth > 1 || - rdst->dirty_level_mask != 0 || + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || rdst->cmask.size || rdst->fmask.size || rsrc->cmask.size || rsrc->fmask.size) { goto fallback; } - if (rsrc->dirty_level_mask) { + if (rsrc->dirty_level_mask & (1 << src_level)) { ctx->flush_resource(ctx, src); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index fb65eb3ce2d..43170ec446b 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -858,6 +858,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_texture *rtex = (struct r600_texture *)surf->texture; rtex->dirty_level_mask |= 1 << surf->u.tex.level; + + if (rtex->surface.flags & RADEON_SURF_SBUFFER) + rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; } if (sctx->framebuffer.compressed_cb_mask) { struct pipe_surface *surf; -- cgit v1.2.3 From c23c92c965f72f9a0b160834d06a2d631b736081 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 6 Sep 2015 17:37:38 +0200 Subject: radeonsi: only do depth-only or stencil-only in-place decompression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit instead of always doing both. Usually, only depth is needed, so stencil decompression is useless. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_blit.c | 28 ++++++++++++++++++++++------ src/gallium/drivers/radeonsi/si_pipe.h | 4 +++- src/gallium/drivers/radeonsi/si_state.c | 12 +++++++++--- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 93fa67a953e..d5c5db30029 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -180,19 +180,27 @@ static void si_blit_decompress_depth(struct pipe_context *ctx, static void si_blit_decompress_depth_in_place(struct si_context *sctx, struct r600_texture *texture, + bool is_stencil_sampler, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer) { struct pipe_surface *zsurf, surf_tmpl = {{0}}; unsigned layer, max_layer, checked_last_layer, level; - - sctx->db_inplace_flush_enabled = true; + unsigned *dirty_level_mask; + + if (is_stencil_sampler) { + sctx->db_flush_stencil_inplace = true; + dirty_level_mask = &texture->stencil_dirty_level_mask; + } else { + sctx->db_flush_depth_inplace = true; + dirty_level_mask = &texture->dirty_level_mask; + } si_mark_atom_dirty(sctx, &sctx->db_render_state); surf_tmpl.format = texture->resource.b.b.format; for (level = first_level; level <= last_level; level++) { - if (!(texture->dirty_level_mask & (1 << level))) + if (!(*dirty_level_mask & (1 << level))) continue; surf_tmpl.u.tex.level = level; @@ -220,11 +228,12 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx, /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { - texture->dirty_level_mask &= ~(1 << level); + *dirty_level_mask &= ~(1 << level); } } - sctx->db_inplace_flush_enabled = false; + sctx->db_flush_depth_inplace = false; + sctx->db_flush_stencil_inplace = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); } @@ -236,17 +245,20 @@ void si_flush_depth_textures(struct si_context *sctx, while (mask) { struct pipe_sampler_view *view; + struct si_sampler_view *sview; struct r600_texture *tex; i = u_bit_scan(&mask); view = textures->views.views[i]; assert(view); + sview = (struct si_sampler_view*)view; tex = (struct r600_texture *)view->texture; assert(tex->is_depth && !tex->is_flushing_texture); si_blit_decompress_depth_in_place(sctx, tex, + sview->is_stencil_sampler, view->u.tex.first_level, view->u.tex.last_level, 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); } @@ -436,9 +448,13 @@ static void si_decompress_subresource(struct pipe_context *ctx, struct r600_texture *rtex = (struct r600_texture*)tex; if (rtex->is_depth && !rtex->is_flushing_texture) { - si_blit_decompress_depth_in_place(sctx, rtex, + si_blit_decompress_depth_in_place(sctx, rtex, false, level, level, first_layer, last_layer); + if (rtex->surface.flags & RADEON_SURF_SBUFFER) + si_blit_decompress_depth_in_place(sctx, rtex, true, + level, level, + first_layer, last_layer); } else if (rtex->fmask.size || rtex->cmask.size) { si_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 41b2832322c..a882d36e170 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -100,6 +100,7 @@ struct si_sampler_view { * [4..7] = buffer descriptor */ uint32_t state[8]; uint32_t fmask_state[8]; + bool is_stencil_sampler; }; struct si_sampler_state { @@ -237,7 +238,8 @@ struct si_context { bool dbcb_depth_copy_enabled; bool dbcb_stencil_copy_enabled; unsigned dbcb_copy_sample; - bool db_inplace_flush_enabled; + bool db_flush_depth_inplace; + bool db_flush_stencil_inplace; bool db_depth_clear; bool db_depth_disable_expclear; unsigned ps_db_shader_control; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 5d4e579b392..85074bdbf5b 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -998,10 +998,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); - } else if (sctx->db_inplace_flush_enabled) { + } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { radeon_emit(cs, - S_028000_DEPTH_COMPRESS_DISABLE(1) | - S_028000_STENCIL_COMPRESS_DISABLE(1)); + S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | + S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); } else if (sctx->db_depth_clear) { radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1)); } else { @@ -2411,6 +2411,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx, pipe_resource_reference(&view->base.texture, texture); view->resource = &tmp->resource; + if (state->format == PIPE_FORMAT_X24S8_UINT || + state->format == PIPE_FORMAT_S8X24_UINT || + state->format == PIPE_FORMAT_X32_S8X24_UINT || + state->format == PIPE_FORMAT_S8_UINT) + view->is_stencil_sampler = true; + /* Buffer resource. */ if (texture->target == PIPE_BUFFER) { unsigned stride, num_records; -- cgit v1.2.3 From 27b102e7fdbcd2beedc815996e1b5fcb2b612206 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 6 Sep 2015 17:37:38 +0200 Subject: r600g: only do depth-only or stencil-only in-place decompression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit instead of always doing both. Usually, only depth is needed, so stencil decompression is useless. Reviewed-by: Michel Dänzer --- src/gallium/drivers/r600/evergreen_state.c | 12 +++++++++--- src/gallium/drivers/r600/r600_blit.c | 27 ++++++++++++++++++++++----- src/gallium/drivers/r600/r600_pipe.h | 4 +++- src/gallium/drivers/r600/r600_state.c | 12 +++++++++--- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 6169c5b719b..c6702a9ca34 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -783,6 +783,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, va = tmp->resource.gpu_address; + if (state->format == PIPE_FORMAT_X24S8_UINT || + state->format == PIPE_FORMAT_S8X24_UINT || + state->format == PIPE_FORMAT_X32_S8X24_UINT || + state->format == PIPE_FORMAT_S8_UINT) + view->is_stencil_sampler = true; + view->tex_resource = &tmp->resource; view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) | S_030000_PITCH((pitch / 8) - 1) | @@ -1823,9 +1829,9 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_ S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) | S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(a->copy_sample); - } else if (a->flush_depthstencil_in_place) { - db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) | - S_028000_STENCIL_COMPRESS_DISABLE(1); + } else if (a->flush_depth_inplace || a->flush_stencil_inplace) { + db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) | + S_028000_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace); db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1); } if (a->htile_clear) { diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index d1370cd8f26..aede8408446 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -202,20 +202,28 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx, static void r600_blit_decompress_depth_in_place(struct r600_context *rctx, struct r600_texture *texture, + bool is_stencil_sampler, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer) { struct pipe_surface *zsurf, surf_tmpl = {{0}}; unsigned layer, max_layer, checked_last_layer, level; + unsigned *dirty_level_mask; /* Enable decompression in DB_RENDER_CONTROL */ - rctx->db_misc_state.flush_depthstencil_in_place = true; + if (is_stencil_sampler) { + rctx->db_misc_state.flush_stencil_inplace = true; + dirty_level_mask = &texture->stencil_dirty_level_mask; + } else { + rctx->db_misc_state.flush_depth_inplace = true; + dirty_level_mask = &texture->dirty_level_mask; + } r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); surf_tmpl.format = texture->resource.b.b.format; for (level = first_level; level <= last_level; level++) { - if (!(texture->dirty_level_mask & (1 << level))) + if (!(*dirty_level_mask & (1 << level))) continue; surf_tmpl.u.tex.level = level; @@ -242,12 +250,13 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx, /* The texture will always be dirty if some layers or samples aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { - texture->dirty_level_mask &= ~(1 << level); + *dirty_level_mask &= ~(1 << level); } } /* Disable decompression in DB_RENDER_CONTROL */ - rctx->db_misc_state.flush_depthstencil_in_place = false; + rctx->db_misc_state.flush_depth_inplace = false; + rctx->db_misc_state.flush_stencil_inplace = false; r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } @@ -259,12 +268,14 @@ void r600_decompress_depth_textures(struct r600_context *rctx, while (depth_texture_mask) { struct pipe_sampler_view *view; + struct r600_pipe_sampler_view *rview; struct r600_texture *tex; i = u_bit_scan(&depth_texture_mask); view = &textures->views[i]->base; assert(view); + rview = (struct r600_pipe_sampler_view*)view; tex = (struct r600_texture *)view->texture; assert(tex->is_depth && !tex->is_flushing_texture); @@ -272,6 +283,7 @@ void r600_decompress_depth_textures(struct r600_context *rctx, if (rctx->b.chip_class >= EVERGREEN || r600_can_read_depth(tex)) { r600_blit_decompress_depth_in_place(rctx, tex, + rview->is_stencil_sampler, view->u.tex.first_level, view->u.tex.last_level, 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); } else { @@ -367,9 +379,14 @@ static bool r600_decompress_subresource(struct pipe_context *ctx, if (rtex->is_depth && !rtex->is_flushing_texture) { if (rctx->b.chip_class >= EVERGREEN || r600_can_read_depth(rtex)) { - r600_blit_decompress_depth_in_place(rctx, rtex, + r600_blit_decompress_depth_in_place(rctx, rtex, false, level, level, first_layer, last_layer); + if (rtex->surface.flags & RADEON_SURF_SBUFFER) { + r600_blit_decompress_depth_in_place(rctx, rtex, true, + level, level, + first_layer, last_layer); + } } else { if (!r600_init_flushed_depth_texture(ctx, tex, NULL)) return false; /* error */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index d0774de8573..520b03f605d 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -109,7 +109,8 @@ struct r600_db_misc_state { struct r600_atom atom; bool occlusion_query_enabled; bool flush_depthstencil_through_cb; - bool flush_depthstencil_in_place; + bool flush_depth_inplace; + bool flush_stencil_inplace; bool copy_depth, copy_stencil; unsigned copy_sample; unsigned log_samples; @@ -253,6 +254,7 @@ struct r600_pipe_sampler_view { struct r600_resource *tex_resource; uint32_t tex_resource_words[8]; bool skip_mip_address_reloc; + bool is_stencil_sampler; }; struct r600_rasterizer_state { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4b171894f5c..1be3e1b4de5 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -710,6 +710,12 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, break; } + if (state->format == PIPE_FORMAT_X24S8_UINT || + state->format == PIPE_FORMAT_S8X24_UINT || + state->format == PIPE_FORMAT_X32_S8X24_UINT || + state->format == PIPE_FORMAT_S8_UINT) + view->is_stencil_sampler = true; + view->tex_resource = &tmp->resource; view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) | S_038000_TILE_MODE(array_mode) | @@ -1659,9 +1665,9 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 || rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RV635) db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); - } else if (a->flush_depthstencil_in_place) { - db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) | - S_028D0C_STENCIL_COMPRESS_DISABLE(1); + } else if (a->flush_depth_inplace || a->flush_stencil_inplace) { + db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) | + S_028D0C_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace); db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); } if (a->htile_clear) { -- cgit v1.2.3 From b78336085bce4d1f36c8d9c72fd3fa41643e3cd1 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 25 Sep 2015 22:44:41 +0200 Subject: st/dri: don't use _ctx in client_wait_sync Not needed and it can be NULL. v2: fix dri2_get_fence_from_cl_event - thanks Albert Cc: 10.6 11.0 Reviewed-by: Albert Freeman --- src/gallium/state_trackers/dri/dri2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 91b443147d6..712203b9db9 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -1293,6 +1293,7 @@ dri2_load_opencl_interop(struct dri_screen *screen) } struct dri2_fence { + struct dri_screen *driscreen; struct pipe_fence_handle *pipe_fence; void *cl_event; }; @@ -1313,6 +1314,7 @@ dri2_create_fence(__DRIcontext *_ctx) return NULL; } + fence->driscreen = dri_screen(_ctx->driScreenPriv); return fence; } @@ -1336,6 +1338,7 @@ dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event) return NULL; } + fence->driscreen = driscreen; return fence; } @@ -1360,9 +1363,9 @@ static GLboolean dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags, uint64_t timeout) { - struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv); - struct pipe_screen *screen = driscreen->base.screen; struct dri2_fence *fence = (struct dri2_fence*)_fence; + struct dri_screen *driscreen = fence->driscreen; + struct pipe_screen *screen = driscreen->base.screen; /* No need to flush. The context was flushed when the fence was created. */ -- cgit v1.2.3 From 18123a732b8593bb7add03d1529ace464f46a7ac Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 25 Sep 2015 22:48:00 +0200 Subject: egl/dri2: don't require a context for ClientWaitSync (v2) The spec doesn't require it. This fixes a crash on Android. v2: don't set any flags if ctx == NULL v3: add the spec note Cc: 10.6 11.0 Reviewed-by: Albert Freeman Reviewed-by: Frank Binns --- src/egl/drivers/dri2/egl_dri2.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 1740ee3dc47..53f21a8eab1 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -2424,13 +2424,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, unsigned wait_flags = 0; EGLint ret = EGL_CONDITION_SATISFIED_KHR; - if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR) + /* The EGL_KHR_fence_sync spec states: + * + * "If no context is current for the bound API, + * the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored. + */ + if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR) wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS; /* the sync object should take a reference while waiting */ dri2_egl_ref_sync(dri2_sync); - if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context, + if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL, dri2_sync->fence, wait_flags, timeout)) dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR; -- cgit v1.2.3 From 95e03033127f9b93b49301adbf5d4b6ddccfa931 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.DeleteShader Nothing overrides it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/glsl/linker.cpp | 8 ++++---- src/glsl/standalone_scaffolding.cpp | 8 ++++++++ src/glsl/standalone_scaffolding.h | 3 +++ src/mesa/main/dd.h | 1 - src/mesa/main/shaderobj.c | 8 +++----- src/mesa/main/shaderobj.h | 3 +++ src/mesa/main/shared.c | 2 +- 7 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index dbf300ac691..826a1881baf 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2132,7 +2132,7 @@ link_intrastage_shaders(void *mem_ctx, if (!ok) { - ctx->Driver.DeleteShader(ctx, linked); + _mesa_delete_shader(ctx, linked); return NULL; } @@ -3732,7 +3732,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] != NULL) - ctx->Driver.DeleteShader(ctx, prog->_LinkedShaders[i]); + _mesa_delete_shader(ctx, prog->_LinkedShaders[i]); prog->_LinkedShaders[i] = NULL; } @@ -3747,7 +3747,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (!prog->LinkStatus) { if (sh) - ctx->Driver.DeleteShader(ctx, sh); + _mesa_delete_shader(ctx, sh); goto done; } @@ -3770,7 +3770,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) } if (!prog->LinkStatus) { if (sh) - ctx->Driver.DeleteShader(ctx, sh); + _mesa_delete_shader(ctx, sh); goto done; } diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index ea9334fd7b7..f08e2d53506 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -85,6 +85,14 @@ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) return shader; } +void +_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh) +{ + free((void *)sh->Source); + free(sh->Label); + ralloc_free(sh); +} + void _mesa_clear_shader_program_data(struct gl_shader_program *shProg) { diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h index dc6fb640f15..a9ca5e4e3d3 100644 --- a/src/glsl/standalone_scaffolding.h +++ b/src/glsl/standalone_scaffolding.h @@ -44,6 +44,9 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, extern "C" struct gl_shader * _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); +extern "C" void +_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh); + extern "C" void _mesa_clear_shader_program_data(struct gl_shader_program *); diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 88f37273e1e..a1cdfedd04f 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -785,7 +785,6 @@ struct dd_function_table { /*@{*/ struct gl_shader *(*NewShader)(struct gl_context *ctx, GLuint name, GLenum type); - void (*DeleteShader)(struct gl_context *ctx, struct gl_shader *shader); struct gl_shader_program *(*NewShaderProgram)(GLuint name); void (*DeleteShaderProgram)(struct gl_context *ctx, struct gl_shader_program *shProg); diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 5cd37d7e4c4..90600fffc4b 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -74,7 +74,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, if (deleteFlag) { if (old->Name != 0) _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name); - ctx->Driver.DeleteShader(ctx, old); + _mesa_delete_shader(ctx, old); } *ptr = NULL; @@ -116,9 +116,8 @@ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) /** * Delete a shader object. - * Called via ctx->Driver.DeleteShader(). */ -static void +void _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh) { free((void *)sh->Source); @@ -362,7 +361,7 @@ _mesa_free_shader_program_data(struct gl_context *ctx, for (sh = 0; sh < MESA_SHADER_STAGES; sh++) { if (shProg->_LinkedShaders[sh] != NULL) { - ctx->Driver.DeleteShader(ctx, shProg->_LinkedShaders[sh]); + _mesa_delete_shader(ctx, shProg->_LinkedShaders[sh]); shProg->_LinkedShaders[sh] = NULL; } } @@ -439,7 +438,6 @@ void _mesa_init_shader_object_functions(struct dd_function_table *driver) { driver->NewShader = _mesa_new_shader; - driver->DeleteShader = _mesa_delete_shader; driver->NewShaderProgram = _mesa_new_shader_program; driver->DeleteShaderProgram = _mesa_delete_shader_program; driver->LinkShader = _mesa_ir_link_shader; diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h index 943044e37cd..8add6425a9a 100644 --- a/src/mesa/main/shaderobj.h +++ b/src/mesa/main/shaderobj.h @@ -82,6 +82,9 @@ _mesa_init_shader(struct gl_context *ctx, struct gl_shader *shader); extern struct gl_shader * _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); +extern void +_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh); + extern struct gl_shader_program * _mesa_lookup_shader_program(struct gl_context *ctx, GLuint name); diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index d5ac9f1fb13..7ab89d08584 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -219,7 +219,7 @@ delete_shader_cb(GLuint id, void *data, void *userData) struct gl_context *ctx = (struct gl_context *) userData; struct gl_shader *sh = (struct gl_shader *) data; if (_mesa_validate_shader_target(ctx, sh->Type)) { - ctx->Driver.DeleteShader(ctx, sh); + _mesa_delete_shader(ctx, sh); } else { struct gl_shader_program *shProg = (struct gl_shader_program *) data; -- cgit v1.2.3 From b37dcb8c18ec28f1ba0c4f69c4b9680a5db9c2fe Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.NewShaderProgram Nothing overrides it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/dd.h | 1 - src/mesa/main/ff_fragment_shader.cpp | 3 ++- src/mesa/main/shaderapi.c | 2 +- src/mesa/main/shaderobj.c | 4 +--- src/mesa/main/shaderobj.h | 3 +++ 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index a1cdfedd04f..56cfda7091c 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -785,7 +785,6 @@ struct dd_function_table { /*@{*/ struct gl_shader *(*NewShader)(struct gl_context *ctx, GLuint name, GLenum type); - struct gl_shader_program *(*NewShaderProgram)(GLuint name); void (*DeleteShaderProgram)(struct gl_context *ctx, struct gl_shader_program *shProg); void (*UseProgram)(struct gl_context *ctx, struct gl_shader_program *shProg); diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index c0030bc5687..e4e2a18c1da 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -32,6 +32,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/samplerobj.h" +#include "main/shaderobj.h" #include "main/texenvprogram.h" #include "main/texobj.h" #include "main/uniforms.h" @@ -1208,7 +1209,7 @@ create_new_program(struct gl_context *ctx, struct state_key *key) p.top_instructions = p.shader->ir; p.instructions = p.shader->ir; p.state = key; - p.shader_program = ctx->Driver.NewShaderProgram(0); + p.shader_program = _mesa_new_shader_program(0); /* Tell the linker to ignore the fact that we're building a * separate shader, in case we're in a GLES2 context that would diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 82a1ec37130..13fdf8c033b 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -320,7 +320,7 @@ create_shader_program(struct gl_context *ctx) name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1); - shProg = ctx->Driver.NewShaderProgram(name); + shProg = _mesa_new_shader_program(name); _mesa_HashInsert(ctx->Shared->ShaderObjects, name, shProg); diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 90600fffc4b..0b85f74bf42 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -245,9 +245,8 @@ init_shader_program(struct gl_shader_program *prog) /** * Allocate a new gl_shader_program object, initialize it. - * Called via ctx->Driver.NewShaderProgram() */ -static struct gl_shader_program * +struct gl_shader_program * _mesa_new_shader_program(GLuint name) { struct gl_shader_program *shProg; @@ -438,7 +437,6 @@ void _mesa_init_shader_object_functions(struct dd_function_table *driver) { driver->NewShader = _mesa_new_shader; - driver->NewShaderProgram = _mesa_new_shader_program; driver->DeleteShaderProgram = _mesa_delete_shader_program; driver->LinkShader = _mesa_ir_link_shader; } diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h index 8add6425a9a..f40c7fcc5e6 100644 --- a/src/mesa/main/shaderobj.h +++ b/src/mesa/main/shaderobj.h @@ -92,6 +92,9 @@ extern struct gl_shader_program * _mesa_lookup_shader_program_err(struct gl_context *ctx, GLuint name, const char *caller); +extern struct gl_shader_program * +_mesa_new_shader_program(GLuint name); + extern void _mesa_clear_shader_program_data(struct gl_shader_program *shProg); -- cgit v1.2.3 From 6863d5b02a5d982cc71cfc28155a8958adfc65d1 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.DeleteShaderProgram Nothing overrides it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/dd.h | 2 -- src/mesa/main/shaderobj.c | 9 ++++----- src/mesa/main/shaderobj.h | 3 +++ src/mesa/main/shared.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 56cfda7091c..6873205375a 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -785,8 +785,6 @@ struct dd_function_table { /*@{*/ struct gl_shader *(*NewShader)(struct gl_context *ctx, GLuint name, GLenum type); - void (*DeleteShaderProgram)(struct gl_context *ctx, - struct gl_shader_program *shProg); void (*UseProgram)(struct gl_context *ctx, struct gl_shader_program *shProg); /*@}*/ diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 0b85f74bf42..4e85fda24b4 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -209,7 +209,7 @@ _mesa_reference_shader_program_(struct gl_context *ctx, if (deleteFlag) { if (old->Name != 0) _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name); - ctx->Driver.DeleteShaderProgram(ctx, old); + _mesa_delete_shader_program(ctx, old); } *ptr = NULL; @@ -372,10 +372,10 @@ _mesa_free_shader_program_data(struct gl_context *ctx, /** * Free/delete a shader program object. - * Called via ctx->Driver.DeleteShaderProgram(). */ -static void -_mesa_delete_shader_program(struct gl_context *ctx, struct gl_shader_program *shProg) +void +_mesa_delete_shader_program(struct gl_context *ctx, + struct gl_shader_program *shProg) { _mesa_free_shader_program_data(ctx, shProg); @@ -437,6 +437,5 @@ void _mesa_init_shader_object_functions(struct dd_function_table *driver) { driver->NewShader = _mesa_new_shader; - driver->DeleteShaderProgram = _mesa_delete_shader_program; driver->LinkShader = _mesa_ir_link_shader; } diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h index f40c7fcc5e6..796de470735 100644 --- a/src/mesa/main/shaderobj.h +++ b/src/mesa/main/shaderobj.h @@ -102,6 +102,9 @@ extern void _mesa_free_shader_program_data(struct gl_context *ctx, struct gl_shader_program *shProg); +extern void +_mesa_delete_shader_program(struct gl_context *ctx, + struct gl_shader_program *shProg); extern void diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index 7ab89d08584..1acaf59f432 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -224,7 +224,7 @@ delete_shader_cb(GLuint id, void *data, void *userData) else { struct gl_shader_program *shProg = (struct gl_shader_program *) data; assert(shProg->Type == GL_SHADER_PROGRAM_MESA); - ctx->Driver.DeleteShaderProgram(ctx, shProg); + _mesa_delete_shader_program(ctx, shProg); } } -- cgit v1.2.3 From a4fca2448422f52508cf7c7948102299c2db63d6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.ResizeBuffers Nothing overrides it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/drivers/dri/common/dri_util.c | 3 ++- src/mesa/main/dd.h | 7 ------- src/mesa/main/framebuffer.c | 3 +-- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 6fe42b1775c..d6195513fe9 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -75,7 +75,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->GetString = NULL; /* REQUIRED! */ driver->UpdateState = NULL; /* REQUIRED! */ - driver->ResizeBuffers = _mesa_resize_framebuffer; driver->Finish = NULL; driver->Flush = NULL; diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index d35ac263a45..5cfa2f8ca4f 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -44,6 +44,7 @@ #include "utils.h" #include "xmlpool.h" #include "main/mtypes.h" +#include "main/framebuffer.h" #include "main/version.h" #include "main/errors.h" #include "main/macros.h" @@ -793,7 +794,7 @@ driUpdateFramebufferSize(struct gl_context *ctx, const __DRIdrawable *dPriv) { struct gl_framebuffer *fb = (struct gl_framebuffer *) dPriv->driverPrivate; if (fb && (dPriv->w != fb->Width || dPriv->h != fb->Height)) { - ctx->Driver.ResizeBuffers(ctx, fb, dPriv->w, dPriv->h); + _mesa_resize_framebuffer(ctx, fb, dPriv->w, dPriv->h); /* if the driver needs the hw lock for ResizeBuffers, the drawable might have changed again by now */ assert(fb->Width == dPriv->w); diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 6873205375a..1cccace12cb 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -93,13 +93,6 @@ struct dd_function_table { */ void (*UpdateState)( struct gl_context *ctx, GLbitfield new_state ); - /** - * Resize the given framebuffer to the given size. - * XXX OBSOLETE: this function will be removed in the future. - */ - void (*ResizeBuffers)( struct gl_context *ctx, struct gl_framebuffer *fb, - GLuint width, GLuint height); - /** * This is called whenever glFinish() is called. */ diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 37e2c29c89c..5b6b3f64581 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -271,8 +271,7 @@ _mesa_reference_framebuffer_(struct gl_framebuffer **ptr, * Resize the given framebuffer's renderbuffers to the new width and height. * This should only be used for window-system framebuffers, not * user-created renderbuffers (i.e. made with GL_EXT_framebuffer_object). - * This will typically be called via ctx->Driver.ResizeBuffers() or directly - * from a device driver. + * This will typically be called directly from a device driver. * * \note it's possible for ctx to be null since a window can be resized * without a currently bound rendering context. -- cgit v1.2.3 From a6cc895e935889fce674e3988f326a2bae3b51ce Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.Accum Nothing calls it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/drivers/dri/i915/intel_pixel.c | 1 - src/mesa/drivers/dri/i965/intel_pixel.c | 1 - src/mesa/main/dd.h | 6 ------ src/mesa/state_tracker/st_context.c | 2 -- 5 files changed, 11 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index d6195513fe9..55e2cfa304c 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -81,7 +81,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) /* framebuffer/image functions */ driver->Clear = _swrast_Clear; - driver->Accum = _mesa_accum; driver->RasterPos = _tnl_RasterPos; driver->DrawPixels = _swrast_DrawPixels; driver->ReadPixels = _mesa_readpixels; diff --git a/src/mesa/drivers/dri/i915/intel_pixel.c b/src/mesa/drivers/dri/i915/intel_pixel.c index 6f139e19e11..feb1a3f97e8 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel.c +++ b/src/mesa/drivers/dri/i915/intel_pixel.c @@ -126,7 +126,6 @@ intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one) void intelInitPixelFuncs(struct dd_function_table *functions) { - functions->Accum = _mesa_accum; functions->Bitmap = intelBitmap; functions->CopyPixels = intelCopyPixels; functions->DrawPixels = intelDrawPixels; diff --git a/src/mesa/drivers/dri/i965/intel_pixel.c b/src/mesa/drivers/dri/i965/intel_pixel.c index 30d3a521ec8..d4f86fdffe0 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel.c +++ b/src/mesa/drivers/dri/i965/intel_pixel.c @@ -128,7 +128,6 @@ intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one) void intelInitPixelFuncs(struct dd_function_table *functions) { - functions->Accum = _mesa_accum; functions->Bitmap = intelBitmap; functions->CopyPixels = intelCopyPixels; functions->DrawPixels = intelDrawPixels; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 1cccace12cb..04d6935c73a 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -110,12 +110,6 @@ struct dd_function_table { */ void (*Clear)( struct gl_context *ctx, GLbitfield buffers ); - /** - * Execute glAccum command. - */ - void (*Accum)( struct gl_context *ctx, GLenum op, GLfloat value ); - - /** * Execute glRasterPos, updating the ctx->Current.Raster fields */ diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 72c23cad4bc..f65aafa9d55 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -410,8 +410,6 @@ void st_init_driver_functions(struct pipe_screen *screen, _mesa_init_shader_object_functions(functions); _mesa_init_sampler_object_functions(functions); - functions->Accum = _mesa_accum; - st_init_blit_functions(functions); st_init_bufferobject_functions(functions); st_init_clear_functions(functions); -- cgit v1.2.3 From 379255298f8fa9d9d8d53bf8898345fdd4fd0222 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove some Driver.Blend* hooks Nothing sets them. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/blend.c | 11 ----------- src/mesa/main/dd.h | 5 ----- 2 files changed, 16 deletions(-) diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index 4fc32962425..1638417d126 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -303,11 +303,6 @@ _mesa_BlendFuncSeparateiARB(GLuint buf, GLenum sfactorRGB, GLenum dfactorRGB, ctx->Color.Blend[buf].DstA = dfactorA; update_uses_dual_src(ctx, buf); ctx->Color._BlendFuncPerBuffer = GL_TRUE; - - if (ctx->Driver.BlendFuncSeparatei) { - ctx->Driver.BlendFuncSeparatei(ctx, buf, sfactorRGB, dfactorRGB, - sfactorA, dfactorA); - } } @@ -406,9 +401,6 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode) ctx->Color.Blend[buf].EquationRGB = mode; ctx->Color.Blend[buf].EquationA = mode; ctx->Color._BlendEquationPerBuffer = GL_TRUE; - - if (ctx->Driver.BlendEquationSeparatei) - ctx->Driver.BlendEquationSeparatei(ctx, buf, mode, mode); } @@ -503,9 +495,6 @@ _mesa_BlendEquationSeparateiARB(GLuint buf, GLenum modeRGB, GLenum modeA) ctx->Color.Blend[buf].EquationRGB = modeRGB; ctx->Color.Blend[buf].EquationA = modeA; ctx->Color._BlendEquationPerBuffer = GL_TRUE; - - if (ctx->Driver.BlendEquationSeparatei) - ctx->Driver.BlendEquationSeparatei(ctx, buf, modeRGB, modeA); } diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 04d6935c73a..ece2b35a0ca 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -516,15 +516,10 @@ struct dd_function_table { /** Set the blend equation */ void (*BlendEquationSeparate)(struct gl_context *ctx, GLenum modeRGB, GLenum modeA); - void (*BlendEquationSeparatei)(struct gl_context *ctx, GLuint buffer, - GLenum modeRGB, GLenum modeA); /** Specify pixel arithmetic */ void (*BlendFuncSeparate)(struct gl_context *ctx, GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA); - void (*BlendFuncSeparatei)(struct gl_context *ctx, GLuint buffer, - GLenum sfactorRGB, GLenum dfactorRGB, - GLenum sfactorA, GLenum dfactorA); /** Specify a plane against which all geometry is clipped */ void (*ClipPlane)(struct gl_context *ctx, GLenum plane, const GLfloat *eq); /** Enable and disable writing of frame buffer color components */ -- cgit v1.2.3 From 8de82faf95e2f4713370876fecba621a9efe87e7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.ColorMaskIndexed Nothing sets it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/drivers/common/driverfuncs.c | 22 +++++----------------- src/mesa/main/blend.c | 3 --- src/mesa/main/dd.h | 2 -- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 55e2cfa304c..44a86537bb9 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -240,23 +240,11 @@ _mesa_init_driver_state(struct gl_context *ctx) ctx->Color.Blend[0].SrcA, ctx->Color.Blend[0].DstA); - if (ctx->Driver.ColorMaskIndexed) { - GLuint i; - for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { - ctx->Driver.ColorMaskIndexed(ctx, i, - ctx->Color.ColorMask[i][RCOMP], - ctx->Color.ColorMask[i][GCOMP], - ctx->Color.ColorMask[i][BCOMP], - ctx->Color.ColorMask[i][ACOMP]); - } - } - else { - ctx->Driver.ColorMask(ctx, - ctx->Color.ColorMask[0][RCOMP], - ctx->Color.ColorMask[0][GCOMP], - ctx->Color.ColorMask[0][BCOMP], - ctx->Color.ColorMask[0][ACOMP]); - } + ctx->Driver.ColorMask(ctx, + ctx->Color.ColorMask[0][RCOMP], + ctx->Color.ColorMask[0][GCOMP], + ctx->Color.ColorMask[0][BCOMP], + ctx->Color.ColorMask[0][ACOMP]); ctx->Driver.CullFace(ctx, ctx->Polygon.CullFaceMode); ctx->Driver.DepthFunc(ctx, ctx->Depth.Func); diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index 1638417d126..dee5e29d5b8 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -734,9 +734,6 @@ _mesa_ColorMaski( GLuint buf, GLboolean red, GLboolean green, FLUSH_VERTICES(ctx, _NEW_COLOR); COPY_4UBV(ctx->Color.ColorMask[buf], tmp); - - if (ctx->Driver.ColorMaskIndexed) - ctx->Driver.ColorMaskIndexed(ctx, buf, red, green, blue, alpha); } diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index ece2b35a0ca..1ab27c9c332 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -525,8 +525,6 @@ struct dd_function_table { /** Enable and disable writing of frame buffer color components */ void (*ColorMask)(struct gl_context *ctx, GLboolean rmask, GLboolean gmask, GLboolean bmask, GLboolean amask ); - void (*ColorMaskIndexed)(struct gl_context *ctx, GLuint buf, GLboolean rmask, - GLboolean gmask, GLboolean bmask, GLboolean amask); /** Cause a material color to track the current color */ void (*ColorMaterial)(struct gl_context *ctx, GLenum face, GLenum mode); /** Specify whether front- or back-facing facets can be culled */ -- cgit v1.2.3 From 1044f99812bb29fa06c83c7230fe80f867711266 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.Hint Nothing sets it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/drivers/dri/r200/r200_state.c | 1 - src/mesa/drivers/dri/radeon/radeon_state.c | 1 - src/mesa/main/dd.h | 2 -- src/mesa/main/hint.c | 5 ----- 5 files changed, 10 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 44a86537bb9..915ac57501f 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -133,7 +133,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->DepthRange = NULL; driver->Enable = NULL; driver->Fogfv = NULL; - driver->Hint = NULL; driver->Lightfv = NULL; driver->LightModelfv = NULL; driver->LineStipple = NULL; diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index cca176d7f9b..3038c634aff 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2389,7 +2389,6 @@ void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *func functions->Enable = r200Enable; functions->Fogfv = r200Fogfv; functions->FrontFace = r200FrontFace; - functions->Hint = NULL; functions->LightModelfv = r200LightModelfv; functions->Lightfv = r200Lightfv; functions->LineStipple = r200LineStipple; diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 74c1fc6c902..8a1b81d8f32 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -2148,7 +2148,6 @@ void radeonInitStateFuncs( struct gl_context *ctx ) ctx->Driver.Enable = radeonEnable; ctx->Driver.Fogfv = radeonFogfv; ctx->Driver.FrontFace = radeonFrontFace; - ctx->Driver.Hint = NULL; ctx->Driver.LightModelfv = radeonLightModelfv; ctx->Driver.Lightfv = radeonLightfv; ctx->Driver.LineStipple = radeonLineStipple; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 1ab27c9c332..dfcc206201f 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -545,8 +545,6 @@ struct dd_function_table { void (*Enable)(struct gl_context *ctx, GLenum cap, GLboolean state); /** Specify fog parameters */ void (*Fogfv)(struct gl_context *ctx, GLenum pname, const GLfloat *params); - /** Specify implementation-specific hints */ - void (*Hint)(struct gl_context *ctx, GLenum target, GLenum mode); /** Set light source parameters. * Note: for GL_POSITION and GL_SPOT_DIRECTION, params will have already * been transformed to eye-space. diff --git a/src/mesa/main/hint.c b/src/mesa/main/hint.c index 984239a7276..5d0c15d35ab 100644 --- a/src/mesa/main/hint.c +++ b/src/mesa/main/hint.c @@ -123,11 +123,6 @@ _mesa_Hint( GLenum target, GLenum mode ) default: goto invalid_target; } - - if (ctx->Driver.Hint) { - (*ctx->Driver.Hint)( ctx, target, mode ); - } - return; invalid_target: -- cgit v1.2.3 From 7401807e8dd89f79a98b89cc1bfce5ed89166653 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.NewArrayObject Nothing reimplements it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/arrayobj.c | 5 ++--- src/mesa/main/dd.h | 1 - src/mesa/main/varray.c | 2 +- src/mesa/state_tracker/st_cb_bufferobjects.c | 1 - 5 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 915ac57501f..85b53f40313 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -178,7 +178,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) _mesa_init_texture_barrier_functions(driver); /* APPLE_vertex_array_object */ - driver->NewArrayObject = _mesa_new_vao; driver->DeleteArrayObject = _mesa_delete_vao; driver->BindArrayObject = NULL; diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 28851434133..dde489e85fe 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -151,7 +151,6 @@ unbind_array_object_vbos(struct gl_context *ctx, struct gl_vertex_array_object * * Allocate and initialize a new vertex array object. * * This function is intended to be called via - * \c dd_function_table::NewArrayObject. */ struct gl_vertex_array_object * _mesa_new_vao(struct gl_context *ctx, GLuint name) @@ -408,7 +407,7 @@ bind_vertex_array(struct gl_context *ctx, GLuint id, GLboolean genRequired) } /* For APPLE version, generate a new array object now */ - newObj = (*ctx->Driver.NewArrayObject)(ctx, id); + newObj = _mesa_new_vao(ctx, id); if (!newObj) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindVertexArrayAPPLE"); return; @@ -565,7 +564,7 @@ gen_vertex_arrays(struct gl_context *ctx, GLsizei n, GLuint *arrays, struct gl_vertex_array_object *obj; GLuint name = first + i; - obj = (*ctx->Driver.NewArrayObject)( ctx, name ); + obj = _mesa_new_vao(ctx, name); if (!obj) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); return; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index dfcc206201f..503ef4be27d 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -752,7 +752,6 @@ struct dd_function_table { * \name Vertex Array objects */ /*@{*/ - struct gl_vertex_array_object * (*NewArrayObject)(struct gl_context *ctx, GLuint id); void (*DeleteArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *); void (*BindArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *); /*@}*/ diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 4df57c148c7..887d0c03a50 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -2354,7 +2354,7 @@ _mesa_print_arrays(struct gl_context *ctx) void _mesa_init_varray(struct gl_context *ctx) { - ctx->Array.DefaultVAO = ctx->Driver.NewArrayObject(ctx, 0); + ctx->Array.DefaultVAO = _mesa_new_vao(ctx, 0); _mesa_reference_vao(ctx, &ctx->Array.VAO, ctx->Array.DefaultVAO); ctx->Array.ActiveTexture = 0; /* GL_ARB_multitexture */ diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index db254c2144e..bcfac980cb4 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -529,6 +529,5 @@ st_init_bufferobject_functions(struct dd_function_table *functions) functions->ClearBufferSubData = st_clear_buffer_subdata; /* For GL_APPLE_vertex_array_object */ - functions->NewArrayObject = _mesa_new_vao; functions->DeleteArrayObject = _mesa_delete_vao; } -- cgit v1.2.3 From d1269a844f4d4b498c0b6c886e6273a7da93582e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.DeleteArrayObject Nothing reimplements it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/arrayobj.c | 6 ++---- src/mesa/main/dd.h | 1 - src/mesa/state_tracker/st_cb_bufferobjects.c | 3 --- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 85b53f40313..84d74dfda8a 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -178,7 +178,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) _mesa_init_texture_barrier_functions(driver); /* APPLE_vertex_array_object */ - driver->DeleteArrayObject = _mesa_delete_vao; driver->BindArrayObject = NULL; _mesa_init_shader_object_functions(driver); diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index dde489e85fe..f7272214000 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -202,10 +202,8 @@ _mesa_reference_vao_(struct gl_context *ctx, deleteFlag = (oldObj->RefCount == 0); mtx_unlock(&oldObj->Mutex); - if (deleteFlag) { - assert(ctx->Driver.DeleteArrayObject); - ctx->Driver.DeleteArrayObject(ctx, oldObj); - } + if (deleteFlag) + _mesa_delete_vao(ctx, oldObj); *ptr = NULL; } diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 503ef4be27d..bbd26dee374 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -752,7 +752,6 @@ struct dd_function_table { * \name Vertex Array objects */ /*@{*/ - void (*DeleteArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *); void (*BindArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *); /*@}*/ diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index bcfac980cb4..8afd336779f 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -527,7 +527,4 @@ st_init_bufferobject_functions(struct dd_function_table *functions) functions->UnmapBuffer = st_bufferobj_unmap; functions->CopyBufferSubData = st_copy_buffer_subdata; functions->ClearBufferSubData = st_clear_buffer_subdata; - - /* For GL_APPLE_vertex_array_object */ - functions->DeleteArrayObject = _mesa_delete_vao; } -- cgit v1.2.3 From 82a950f18799d2ec6793892f469234b87e28bf02 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.BindArrayObject Nothing sets it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/drivers/common/driverfuncs.c | 6 ------ src/mesa/main/arrayobj.c | 4 ---- src/mesa/main/dd.h | 8 -------- 3 files changed, 18 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 84d74dfda8a..da70dfdc0f4 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -176,14 +176,8 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->DiscardFramebuffer = NULL; _mesa_init_texture_barrier_functions(driver); - - /* APPLE_vertex_array_object */ - driver->BindArrayObject = NULL; - _mesa_init_shader_object_functions(driver); - _mesa_init_transform_feedback_functions(driver); - _mesa_init_sampler_object_functions(driver); /* T&L stuff */ diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index f7272214000..061e557a397 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -443,10 +443,6 @@ bind_vertex_array(struct gl_context *ctx, GLuint id, GLboolean genRequired) ctx->NewState |= _NEW_ARRAY; _mesa_reference_vao(ctx, &ctx->Array.VAO, newObj); - - /* Pass BindVertexArray call to device driver */ - if (ctx->Driver.BindArrayObject && newObj) - ctx->Driver.BindArrayObject(ctx, newObj); } diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index bbd26dee374..9c6462f87b6 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -747,14 +747,6 @@ struct dd_function_table { GLint *bytesWritten); /*@}*/ - - /** - * \name Vertex Array objects - */ - /*@{*/ - void (*BindArrayObject)(struct gl_context *ctx, struct gl_vertex_array_object *); - /*@}*/ - /** * \name GLSL-related functions (ARB extensions and OpenGL 2.x) */ -- cgit v1.2.3 From 91799880b38e889633f136ddd72b4bcbcee150a9 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.BeginVertices Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/main/dd.h | 5 ----- src/mesa/vbo/vbo_exec.c | 1 - src/mesa/vbo/vbo_exec_api.c | 4 ++-- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 9c6462f87b6..62cbf43c733 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -792,11 +792,6 @@ struct dd_function_table { /** Need to call SaveFlushVertices() upon state change? */ GLboolean SaveNeedFlush; - /* Called prior to any of the GLvertexformat functions being - * called. Paired with Driver.FlushVertices(). - */ - void (*BeginVertices)( struct gl_context *ctx ); - /** * If inside glBegin()/glEnd(), it should assert(0). Otherwise, if * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c index eb903504328..18fb88cf19a 100644 --- a/src/mesa/vbo/vbo_exec.c +++ b/src/mesa/vbo/vbo_exec.c @@ -50,7 +50,6 @@ void vbo_exec_init( struct gl_context *ctx ) ctx->Driver.NeedFlush = 0; ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END; - ctx->Driver.BeginVertices = vbo_exec_BeginVertices; ctx->Driver.FlushVertices = vbo_exec_FlushVertices; vbo_exec_invalidate_state( ctx, ~0 ); diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 138cd60513d..48555892de2 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -419,7 +419,7 @@ do { \ struct vbo_exec_context *exec = &vbo_context(ctx)->exec; \ int sz = (sizeof(C) / sizeof(GLfloat)); \ if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) \ - ctx->Driver.BeginVertices( ctx ); \ + vbo_exec_BeginVertices(ctx); \ \ if (unlikely(exec->vtx.active_sz[A] != N * sz) || \ unlikely(exec->vtx.attrtype[A] != T)) \ @@ -1190,7 +1190,7 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags ) /* Flush (draw), and make sure VBO is left unmapped when done */ vbo_exec_FlushVertices_internal(exec, GL_TRUE); - /* Need to do this to ensure BeginVertices gets called again: + /* Need to do this to ensure vbo_exec_BeginVertices gets called again: */ ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags); -- cgit v1.2.3 From 72a5dff9cbc9ec9edee9e9ef539e4cb3f9051903 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.FlushVertices Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/context.h | 5 +++-- src/mesa/main/dd.h | 10 ---------- src/mesa/vbo/vbo.h | 3 +++ src/mesa/vbo/vbo_exec.c | 1 - src/mesa/vbo/vbo_exec.h | 1 - src/mesa/vbo/vbo_exec_api.c | 9 ++++++++- 7 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index da70dfdc0f4..f34f7ff144e 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -187,7 +187,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->SaveNeedFlush = 0; driver->ProgramStringNotify = _tnl_program_string; - driver->FlushVertices = NULL; driver->SaveFlushVertices = NULL; driver->NotifySaveBegin = NULL; driver->LightingSpaceChange = NULL; diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h index 0f7529ad975..1e7a12c8a84 100644 --- a/src/mesa/main/context.h +++ b/src/mesa/main/context.h @@ -51,6 +51,7 @@ #include "imports.h" #include "mtypes.h" +#include "vbo/vbo.h" #ifdef __cplusplus @@ -227,7 +228,7 @@ do { \ if (MESA_VERBOSE & VERBOSE_STATE) \ _mesa_debug(ctx, "FLUSH_VERTICES in %s\n", MESA_FUNCTION);\ if (ctx->Driver.NeedFlush & FLUSH_STORED_VERTICES) \ - ctx->Driver.FlushVertices(ctx, FLUSH_STORED_VERTICES); \ + vbo_exec_FlushVertices(ctx, FLUSH_STORED_VERTICES); \ ctx->NewState |= newstate; \ } while (0) @@ -246,7 +247,7 @@ do { \ if (MESA_VERBOSE & VERBOSE_STATE) \ _mesa_debug(ctx, "FLUSH_CURRENT in %s\n", MESA_FUNCTION); \ if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) \ - ctx->Driver.FlushVertices(ctx, FLUSH_UPDATE_CURRENT); \ + vbo_exec_FlushVertices(ctx, FLUSH_UPDATE_CURRENT); \ ctx->NewState |= newstate; \ } while (0) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 62cbf43c733..69183836500 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -792,16 +792,6 @@ struct dd_function_table { /** Need to call SaveFlushVertices() upon state change? */ GLboolean SaveNeedFlush; - /** - * If inside glBegin()/glEnd(), it should assert(0). Otherwise, if - * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered - * vertices, if FLUSH_UPDATE_CURRENT bit is set updates - * __struct gl_contextRec::Current and gl_light_attrib::Material - * - * Note that the default T&L engine never clears the - * FLUSH_UPDATE_CURRENT bit, even after performing the update. - */ - void (*FlushVertices)( struct gl_context *ctx, GLuint flags ); void (*SaveFlushVertices)( struct gl_context *ctx ); /** diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 2aaff5df019..57ab2acc18a 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -88,6 +88,9 @@ void vbo_initialize_save_dispatch(const struct gl_context *ctx, struct _glapi_table *exec); +void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags); + + typedef void (*vbo_draw_func)( struct gl_context *ctx, const struct _mesa_prim *prims, diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c index 18fb88cf19a..a301c6c9a22 100644 --- a/src/mesa/vbo/vbo_exec.c +++ b/src/mesa/vbo/vbo_exec.c @@ -50,7 +50,6 @@ void vbo_exec_init( struct gl_context *ctx ) ctx->Driver.NeedFlush = 0; ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END; - ctx->Driver.FlushVertices = vbo_exec_FlushVertices; vbo_exec_invalidate_state( ctx, ~0 ); } diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h index f17fe684cc3..80f3015925d 100644 --- a/src/mesa/vbo/vbo_exec.h +++ b/src/mesa/vbo/vbo_exec.h @@ -148,7 +148,6 @@ void vbo_exec_destroy( struct gl_context *ctx ); void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state ); void vbo_exec_BeginVertices( struct gl_context *ctx ); -void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags ); /* Internal functions: diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 48555892de2..583a2f9b79f 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -1165,7 +1165,14 @@ void vbo_exec_BeginVertices( struct gl_context *ctx ) /** - * Called via ctx->Driver.FlushVertices() + * If inside glBegin()/glEnd(), it should assert(0). Otherwise, if + * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered + * vertices, if FLUSH_UPDATE_CURRENT bit is set updates + * __struct gl_contextRec::Current and gl_light_attrib::Material + * + * Note that the default T&L engine never clears the + * FLUSH_UPDATE_CURRENT bit, even after performing the update. + * * \param flags bitmask of FLUSH_STORED_VERTICES, FLUSH_UPDATE_CURRENT */ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags ) -- cgit v1.2.3 From 4b8bb2f559b75c953e87c7b6bda17b155a87df15 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.SaveFlushVertices Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/dd.h | 4 +--- src/mesa/main/dlist.c | 5 ++--- src/mesa/vbo/vbo.h | 1 + src/mesa/vbo/vbo_save.c | 1 - src/mesa/vbo/vbo_save.h | 1 - src/mesa/vbo/vbo_save_api.c | 2 +- 7 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index f34f7ff144e..571c631a7da 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -187,7 +187,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->SaveNeedFlush = 0; driver->ProgramStringNotify = _tnl_program_string; - driver->SaveFlushVertices = NULL; driver->NotifySaveBegin = NULL; driver->LightingSpaceChange = NULL; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 69183836500..2ae2eecf5dc 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -789,11 +789,9 @@ struct dd_function_table { */ GLbitfield NeedFlush; - /** Need to call SaveFlushVertices() upon state change? */ + /** Need to call vbo_save_SaveFlushVertices() upon state change? */ GLboolean SaveNeedFlush; - void (*SaveFlushVertices)( struct gl_context *ctx ); - /** * Give the driver the opportunity to hook in its own vtxfmt for * compiling optimized display lists. This is called on each valid diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 5554738d1a3..944be2505b4 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -105,13 +105,12 @@ struct gl_list_extensions * \param ctx GL context. * * Checks if dd_function_table::SaveNeedFlush is marked to flush - * stored (save) vertices, and calls - * dd_function_table::SaveFlushVertices if so. + * stored (save) vertices, and calls vbo_save_SaveFlushVertices if so. */ #define SAVE_FLUSH_VERTICES(ctx) \ do { \ if (ctx->Driver.SaveNeedFlush) \ - ctx->Driver.SaveFlushVertices(ctx); \ + vbo_save_SaveFlushVertices(ctx); \ } while (0) diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 57ab2acc18a..c1f92368a27 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -89,6 +89,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx, struct _glapi_table *exec); void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags); +void vbo_save_SaveFlushVertices(struct gl_context *ctx); diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index a177660c0f2..7de1966f9c9 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -37,7 +37,6 @@ static void vbo_save_callback_init( struct gl_context *ctx ) { ctx->Driver.NewList = vbo_save_NewList; ctx->Driver.EndList = vbo_save_EndList; - ctx->Driver.SaveFlushVertices = vbo_save_SaveFlushVertices; ctx->Driver.BeginCallList = vbo_save_BeginCallList; ctx->Driver.EndCallList = vbo_save_EndCallList; ctx->Driver.NotifySaveBegin = vbo_save_NotifyBegin; diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index 5b1ac81771e..465c314a930 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -179,7 +179,6 @@ void vbo_save_EndList( struct gl_context *ctx ); void vbo_save_NewList( struct gl_context *ctx, GLuint list, GLenum mode ); void vbo_save_EndCallList( struct gl_context *ctx ); void vbo_save_BeginCallList( struct gl_context *ctx, struct gl_display_list *list ); -void vbo_save_SaveFlushVertices( struct gl_context *ctx ); GLboolean vbo_save_NotifyBegin( struct gl_context *ctx, GLenum mode ); void vbo_save_playback_vertex_list( struct gl_context *ctx, void *data ); diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 29de3d38aaa..36cc11760f1 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -1001,7 +1001,7 @@ vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode) _mesa_install_save_vtxfmt(ctx, &save->vtxfmt); } - /* We need to call SaveFlushVertices() if there's state change */ + /* We need to call vbo_save_SaveFlushVertices() if there's state change */ ctx->Driver.SaveNeedFlush = GL_TRUE; /* GL_TRUE means we've handled this glBegin here; don't compile a BEGIN -- cgit v1.2.3 From 7a5493972881cb6beb2e172f4159b39809ab3295 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.NotifySaveBegin Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/dd.h | 7 ------- src/mesa/main/dlist.c | 2 +- src/mesa/vbo/vbo.h | 2 +- src/mesa/vbo/vbo_save.c | 1 - src/mesa/vbo/vbo_save.h | 1 - src/mesa/vbo/vbo_save_api.c | 5 +---- 7 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 571c631a7da..1d6246a8281 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -187,7 +187,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->SaveNeedFlush = 0; driver->ProgramStringNotify = _tnl_program_string; - driver->NotifySaveBegin = NULL; driver->LightingSpaceChange = NULL; /* display list */ diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 2ae2eecf5dc..43dee9ae7d6 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -792,13 +792,6 @@ struct dd_function_table { /** Need to call vbo_save_SaveFlushVertices() upon state change? */ GLboolean SaveNeedFlush; - /** - * Give the driver the opportunity to hook in its own vtxfmt for - * compiling optimized display lists. This is called on each valid - * glBegin() during list compilation. - */ - GLboolean (*NotifySaveBegin)( struct gl_context *ctx, GLenum mode ); - /** * Notify driver that the special derived value _NeedEyeCoords has * changed. diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 944be2505b4..f63efa1acc1 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -5465,7 +5465,7 @@ save_Begin(GLenum mode) /* Give the driver an opportunity to hook in an optimized * display list compiler. */ - if (ctx->Driver.NotifySaveBegin(ctx, mode)) + if (vbo_save_NotifyBegin(ctx, mode)) return; SAVE_FLUSH_VERTICES(ctx); diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index c1f92368a27..c316a09f9ec 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -90,7 +90,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx, void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags); void vbo_save_SaveFlushVertices(struct gl_context *ctx); - +GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode); typedef void (*vbo_draw_func)( struct gl_context *ctx, diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index 7de1966f9c9..bee6634cfec 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -39,7 +39,6 @@ static void vbo_save_callback_init( struct gl_context *ctx ) ctx->Driver.EndList = vbo_save_EndList; ctx->Driver.BeginCallList = vbo_save_BeginCallList; ctx->Driver.EndCallList = vbo_save_EndCallList; - ctx->Driver.NotifySaveBegin = vbo_save_NotifyBegin; } diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index 465c314a930..ccfe5700978 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -179,7 +179,6 @@ void vbo_save_EndList( struct gl_context *ctx ); void vbo_save_NewList( struct gl_context *ctx, GLuint list, GLenum mode ); void vbo_save_EndCallList( struct gl_context *ctx ); void vbo_save_BeginCallList( struct gl_context *ctx, struct gl_display_list *list ); -GLboolean vbo_save_NotifyBegin( struct gl_context *ctx, GLenum mode ); void vbo_save_playback_vertex_list( struct gl_context *ctx, void *data ); diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 36cc11760f1..1a70d168c55 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -970,8 +970,7 @@ _save_CallLists(GLsizei n, GLenum type, const GLvoid * v) /** - * Called via ctx->Driver.NotifySaveBegin() when a glBegin is getting - * compiled into a display list. + * Called when a glBegin is getting compiled into a display list. * Updating of ctx->Driver.CurrentSavePrimitive is already taken care of. */ GLboolean @@ -1604,8 +1603,6 @@ vbo_save_api_init(struct vbo_save_context *save) vbo_destroy_vertex_list, vbo_print_vertex_list); - ctx->Driver.NotifySaveBegin = vbo_save_NotifyBegin; - _save_vtxfmt_init(ctx); _save_current_init(ctx); _mesa_noop_vtxfmt_init(&save->vtxfmt_noop); -- cgit v1.2.3 From 55735cad007f15fb407f803a0416593997a2045e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.NewList Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/dd.h | 7 ------- src/mesa/main/dlist.c | 2 +- src/mesa/vbo/vbo.h | 1 + src/mesa/vbo/vbo_save.c | 1 - src/mesa/vbo/vbo_save.h | 1 - 6 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 1d6246a8281..cf43b155ddb 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -190,7 +190,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->LightingSpaceChange = NULL; /* display list */ - driver->NewList = NULL; driver->EndList = NULL; driver->BeginCallList = NULL; driver->EndCallList = NULL; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 43dee9ae7d6..244aa9d9865 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -798,13 +798,6 @@ struct dd_function_table { */ void (*LightingSpaceChange)( struct gl_context *ctx ); - /** - * Called by glNewList(). - * - * Let the T&L component know what is going on with display lists - * in time to make changes to dispatch tables, etc. - */ - void (*NewList)( struct gl_context *ctx, GLuint list, GLenum mode ); /** * Called by glEndList(). * diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index f63efa1acc1..d3a329f8292 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -9028,7 +9028,7 @@ _mesa_NewList(GLuint name, GLenum mode) ctx->ListState.CurrentBlock = ctx->ListState.CurrentList->Head; ctx->ListState.CurrentPos = 0; - ctx->Driver.NewList(ctx, name, mode); + vbo_save_NewList(ctx, name, mode); ctx->CurrentDispatch = ctx->Save; _glapi_set_dispatch(ctx->CurrentDispatch); diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index c316a09f9ec..e6eba47901c 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -91,6 +91,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx, void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags); void vbo_save_SaveFlushVertices(struct gl_context *ctx); GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode); +void vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode); typedef void (*vbo_draw_func)( struct gl_context *ctx, diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index bee6634cfec..26df1edc59e 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -35,7 +35,6 @@ static void vbo_save_callback_init( struct gl_context *ctx ) { - ctx->Driver.NewList = vbo_save_NewList; ctx->Driver.EndList = vbo_save_EndList; ctx->Driver.BeginCallList = vbo_save_BeginCallList; ctx->Driver.EndCallList = vbo_save_EndCallList; diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index ccfe5700978..65034ee5ef1 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -176,7 +176,6 @@ void vbo_loopback_vertex_list( struct gl_context *ctx, /* Callbacks: */ void vbo_save_EndList( struct gl_context *ctx ); -void vbo_save_NewList( struct gl_context *ctx, GLuint list, GLenum mode ); void vbo_save_EndCallList( struct gl_context *ctx ); void vbo_save_BeginCallList( struct gl_context *ctx, struct gl_display_list *list ); -- cgit v1.2.3 From f457964885afedaa47c1ee675c313650d1082473 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.EndList Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/dd.h | 7 ------- src/mesa/main/dlist.c | 2 +- src/mesa/vbo/vbo.h | 1 + src/mesa/vbo/vbo_save.c | 1 - src/mesa/vbo/vbo_save.h | 1 - 6 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index cf43b155ddb..1a823048d1d 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -190,7 +190,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->LightingSpaceChange = NULL; /* display list */ - driver->EndList = NULL; driver->BeginCallList = NULL; driver->EndCallList = NULL; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 244aa9d9865..6e30484b1eb 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -798,13 +798,6 @@ struct dd_function_table { */ void (*LightingSpaceChange)( struct gl_context *ctx ); - /** - * Called by glEndList(). - * - * \sa dd_function_table::NewList. - */ - void (*EndList)( struct gl_context *ctx ); - /** * Called by glCallList(s). * diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index d3a329f8292..804583d0dea 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -9062,7 +9062,7 @@ _mesa_EndList(void) /* Call before emitting END_OF_LIST, in case the driver wants to * emit opcodes itself. */ - ctx->Driver.EndList(ctx); + vbo_save_EndList(ctx); (void) alloc_instruction(ctx, OPCODE_END_OF_LIST, 0); diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index e6eba47901c..c4768bb1d33 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -92,6 +92,7 @@ void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags); void vbo_save_SaveFlushVertices(struct gl_context *ctx); GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode); void vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode); +void vbo_save_EndList(struct gl_context *ctx); typedef void (*vbo_draw_func)( struct gl_context *ctx, diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index 26df1edc59e..4a786967d39 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -35,7 +35,6 @@ static void vbo_save_callback_init( struct gl_context *ctx ) { - ctx->Driver.EndList = vbo_save_EndList; ctx->Driver.BeginCallList = vbo_save_BeginCallList; ctx->Driver.EndCallList = vbo_save_EndCallList; } diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index 65034ee5ef1..2cd969486f5 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -175,7 +175,6 @@ void vbo_loopback_vertex_list( struct gl_context *ctx, /* Callbacks: */ -void vbo_save_EndList( struct gl_context *ctx ); void vbo_save_EndCallList( struct gl_context *ctx ); void vbo_save_BeginCallList( struct gl_context *ctx, struct gl_display_list *list ); -- cgit v1.2.3 From ef6c0714af57d1aeaa9904fc4bb074e381ef928b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.BeginCallList Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/drivers/common/driverfuncs.c | 1 - src/mesa/main/dd.h | 7 ------- src/mesa/main/dlist.c | 3 +-- src/mesa/vbo/vbo.h | 1 + src/mesa/vbo/vbo_save.c | 1 - src/mesa/vbo/vbo_save.h | 1 - 6 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 1a823048d1d..8da35b4aac7 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -190,7 +190,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->LightingSpaceChange = NULL; /* display list */ - driver->BeginCallList = NULL; driver->EndCallList = NULL; /* GL_ARB_texture_storage */ diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 6e30484b1eb..76def4c398c 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -798,13 +798,6 @@ struct dd_function_table { */ void (*LightingSpaceChange)( struct gl_context *ctx ); - /** - * Called by glCallList(s). - * - * Notify the T&L component before and after calling a display list. - */ - void (*BeginCallList)( struct gl_context *ctx, - struct gl_display_list *dlist ); /** * Called by glEndCallList(). * diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 804583d0dea..af634a49080 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -7742,8 +7742,7 @@ execute_list(struct gl_context *ctx, GLuint list) ctx->ListState.CallDepth++; - if (ctx->Driver.BeginCallList) - ctx->Driver.BeginCallList(ctx, dlist); + vbo_save_BeginCallList(ctx, dlist); n = dlist->Head; diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index c4768bb1d33..07da3663b05 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -93,6 +93,7 @@ void vbo_save_SaveFlushVertices(struct gl_context *ctx); GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode); void vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode); void vbo_save_EndList(struct gl_context *ctx); +void vbo_save_BeginCallList(struct gl_context *ctx, struct gl_display_list *list); typedef void (*vbo_draw_func)( struct gl_context *ctx, diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index 4a786967d39..07022b95789 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -35,7 +35,6 @@ static void vbo_save_callback_init( struct gl_context *ctx ) { - ctx->Driver.BeginCallList = vbo_save_BeginCallList; ctx->Driver.EndCallList = vbo_save_EndCallList; } diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index 2cd969486f5..699203ce8d6 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -176,7 +176,6 @@ void vbo_loopback_vertex_list( struct gl_context *ctx, /* Callbacks: */ void vbo_save_EndCallList( struct gl_context *ctx ); -void vbo_save_BeginCallList( struct gl_context *ctx, struct gl_display_list *list ); void vbo_save_playback_vertex_list( struct gl_context *ctx, void *data ); -- cgit v1.2.3 From 00f6beed02d644189b935b3cc9d70a6f993c034e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.EndCallList Nothing overrides it. Reviewed-by: Brian Paul --- src/mesa/drivers/common/driverfuncs.c | 3 --- src/mesa/main/dd.h | 7 ------- src/mesa/main/dlist.c | 3 +-- src/mesa/vbo/vbo.h | 1 + src/mesa/vbo/vbo_save.c | 8 -------- src/mesa/vbo/vbo_save.h | 2 -- 6 files changed, 2 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 8da35b4aac7..3d1fccb3ab4 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -189,9 +189,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->ProgramStringNotify = _tnl_program_string; driver->LightingSpaceChange = NULL; - /* display list */ - driver->EndCallList = NULL; - /* GL_ARB_texture_storage */ driver->AllocTextureStorage = _mesa_AllocTextureStorage_sw; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 76def4c398c..0b04c8db23d 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -798,13 +798,6 @@ struct dd_function_table { */ void (*LightingSpaceChange)( struct gl_context *ctx ); - /** - * Called by glEndCallList(). - * - * \sa dd_function_table::BeginCallList. - */ - void (*EndCallList)( struct gl_context *ctx ); - /**@}*/ /** diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index af634a49080..e8059c7b260 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -8898,8 +8898,7 @@ execute_list(struct gl_context *ctx, GLuint list) } } - if (ctx->Driver.EndCallList) - ctx->Driver.EndCallList(ctx); + vbo_save_EndCallList(ctx); ctx->ListState.CallDepth--; } diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 07da3663b05..00e843c9a0f 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -94,6 +94,7 @@ GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode); void vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode); void vbo_save_EndList(struct gl_context *ctx); void vbo_save_BeginCallList(struct gl_context *ctx, struct gl_display_list *list); +void vbo_save_EndCallList(struct gl_context *ctx); typedef void (*vbo_draw_func)( struct gl_context *ctx, diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index 07022b95789..79603e9b32e 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -33,13 +33,6 @@ #include "vbo_context.h" -static void vbo_save_callback_init( struct gl_context *ctx ) -{ - ctx->Driver.EndCallList = vbo_save_EndCallList; -} - - - /** * Called at context creation time. */ @@ -51,7 +44,6 @@ void vbo_save_init( struct gl_context *ctx ) save->ctx = ctx; vbo_save_api_init( save ); - vbo_save_callback_init(ctx); { struct gl_client_array *arrays = save->arrays; diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index 699203ce8d6..8032db8a9e0 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -175,8 +175,6 @@ void vbo_loopback_vertex_list( struct gl_context *ctx, /* Callbacks: */ -void vbo_save_EndCallList( struct gl_context *ctx ); - void vbo_save_playback_vertex_list( struct gl_context *ctx, void *data ); void vbo_save_api_init( struct vbo_save_context *save ); -- cgit v1.2.3 From 92709dcb9b7a09f9e5870a832c22197cde557fd4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.DeleteSamplerObject Nothing overrides it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/dd.h | 2 -- src/mesa/main/samplerobj.c | 28 ++++++++++------------------ 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 0b04c8db23d..03cbff472b6 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -875,8 +875,6 @@ struct dd_function_table { */ struct gl_sampler_object * (*NewSamplerObject)(struct gl_context *ctx, GLuint name); - void (*DeleteSamplerObject)(struct gl_context *ctx, - struct gl_sampler_object *samp); /** * \name Return a timestamp in nanoseconds as defined by GL_ARB_timer_query. diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index c7b96664c21..9bcba60fd6f 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -72,6 +72,14 @@ lookup_samplerobj_locked(struct gl_context *ctx, GLuint name) _mesa_HashLookupLocked(ctx->Shared->SamplerObjects, name); } +static void +delete_sampler_object(struct gl_context *ctx, + struct gl_sampler_object *sampObj) +{ + mtx_destroy(&sampObj->Mutex); + free(sampObj->Label); + free(sampObj); +} /** * Handle reference counting. @@ -94,10 +102,8 @@ _mesa_reference_sampler_object_(struct gl_context *ctx, deleteFlag = (oldSamp->RefCount == 0); mtx_unlock(&oldSamp->Mutex); - if (deleteFlag) { - assert(ctx->Driver.DeleteSamplerObject); - ctx->Driver.DeleteSamplerObject(ctx, oldSamp); - } + if (deleteFlag) + delete_sampler_object(ctx, oldSamp); *ptr = NULL; } @@ -162,19 +168,6 @@ _mesa_new_sampler_object(struct gl_context *ctx, GLuint name) return sampObj; } - -/** - * Fallback for ctx->Driver.DeleteSamplerObject(); - */ -static void -_mesa_delete_sampler_object(struct gl_context *ctx, - struct gl_sampler_object *sampObj) -{ - mtx_destroy(&sampObj->Mutex); - free(sampObj->Label); - free(sampObj); -} - static void create_samplers(struct gl_context *ctx, GLsizei count, GLuint *samplers, const char *caller) @@ -1626,5 +1619,4 @@ void _mesa_init_sampler_object_functions(struct dd_function_table *driver) { driver->NewSamplerObject = _mesa_new_sampler_object; - driver->DeleteSamplerObject = _mesa_delete_sampler_object; } -- cgit v1.2.3 From dd340b34f30e71db56f1a12768c7332b8224448c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:28:22 +0200 Subject: mesa: remove Driver.BindImageTexture Nothing sets it. Reviewed-by: Brian Paul Reviewed-by: Ian Romanick --- src/mesa/main/dd.h | 6 ------ src/mesa/main/shaderimage.c | 9 --------- 2 files changed, 15 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 03cbff472b6..496a14f8dad 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -917,12 +917,6 @@ struct dd_function_table { * \name GL_ARB_shader_image_load_store interface. */ /** @{ */ - void (*BindImageTexture)(struct gl_context *ctx, - struct gl_image_unit *unit, - struct gl_texture_object *texObj, - GLint level, GLboolean layered, GLint layer, - GLenum access, GLenum format); - void (*MemoryBarrier)(struct gl_context *ctx, GLbitfield barriers); /** @} */ diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index c4bba842ca7..bd4b7c7be3b 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -577,10 +577,6 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level, u->Layered = GL_FALSE; u->Layer = 0; } - - if (ctx->Driver.BindImageTexture) - ctx->Driver.BindImageTexture(ctx, u, u->TexObj, level, layered, - layer, access, format); } void GLAPIENTRY @@ -719,11 +715,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) u->_ActualFormat = MESA_FORMAT_R_UNORM8; u->_Valid = GL_FALSE; } - - /* Pass the BindImageTexture call down to the device driver */ - if (ctx->Driver.BindImageTexture) - ctx->Driver.BindImageTexture(ctx, u, u->TexObj, u->Level, u->Layered, - u->Layer, u->Access, u->Format); } _mesa_end_texture_lookups(ctx); -- cgit v1.2.3 From d8932a355dfdd813f903b4f2bd6aab36ea66d14a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 21:08:46 +0200 Subject: st/mesa: add ST_DEBUG=precompile support for tessellation shaders Reviewed-by: Ilia Mirkin --- src/mesa/state_tracker/st_program.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index e62dd7aab80..9c271476dd2 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -1681,6 +1681,26 @@ st_precompile_shader_variant(struct st_context *st, break; } + case GL_TESS_CONTROL_PROGRAM_NV: { + struct st_tessctrl_program *p = (struct st_tessctrl_program *)prog; + struct st_tcp_variant_key key; + + memset(&key, 0, sizeof(key)); + key.st = st; + st_get_tcp_variant(st, p, &key); + break; + } + + case GL_TESS_EVALUATION_PROGRAM_NV: { + struct st_tesseval_program *p = (struct st_tesseval_program *)prog; + struct st_tep_variant_key key; + + memset(&key, 0, sizeof(key)); + key.st = st; + st_get_tep_variant(st, p, &key); + break; + } + case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *p = (struct st_geometry_program *)prog; struct st_gp_variant_key key; -- cgit v1.2.3 From f3b37e321fe5ea8a8c0ff026636d69ce90437a6f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 19:32:07 +0200 Subject: gallium: add per-sample interpolation control into rasterizer statOAe Required by ARB_sample_shading for drivers that don't want a shader variant in st/mesa. Reviewed-by: Ilia Mirkin Acked-by: Roland Scheidegger --- src/gallium/docs/source/screen.rst | 9 +++++++++ src/gallium/drivers/freedreno/freedreno_screen.c | 1 + src/gallium/drivers/i915/i915_screen.c | 1 + src/gallium/drivers/ilo/ilo_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/softpipe/sp_screen.c | 1 + src/gallium/drivers/svga/svga_screen.c | 1 + src/gallium/drivers/vc4/vc4_screen.c | 1 + src/gallium/include/pipe/p_defines.h | 1 + src/gallium/include/pipe/p_state.h | 1 + 16 files changed, 24 insertions(+) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index e7800472f44..e08844b2f0b 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -268,6 +268,15 @@ The integer capabilities: bounds_max states of pipe_depth_stencil_alpha_state behave according to the GL_EXT_depth_bounds_test specification. * ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported +* ``PIPE_CAP_FORCE_PERSAMPLE_INTERP``: If the driver can force per-sample + interpolation for all fragment shader inputs if + pipe_rasterizer_state::force_persample_interp is set. This is only used + by GL3-level sample shading (ARB_sample_shading). GL4-level sample shading + (ARB_gpu_shader5) doesn't use this. While GL3 hardware has a state for it, + GL4 hardware will likely need to emulate it with a shader variant, or by + selecting the interpolation weights with a conditional assignment + in the shader. + .. _pipe_capf: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 9a684d4ffbb..0d0100590d6 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -235,6 +235,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 51c64edda22..9d6b3d39183 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -248,6 +248,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 9e37e24014a..76812a666a0 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -470,6 +470,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 697e3d97c19..50c3781f5f8 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -297,6 +297,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 806d4e6b04f..39267b354e3 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -170,6 +170,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index c3bbc833f5b..6012ff6fcb7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -215,6 +215,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 1909b914d02..32da76c88f6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -201,6 +201,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index e669ba2edde..1165ac8a9c0 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -196,6 +196,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; /* SWTCL-only features. */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 36d7e68a8f2..efb4889e562 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -342,6 +342,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index cdd33aa0831..a784db624e3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -336,6 +336,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index d8606f3c07e..d468cf4de54 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -247,6 +247,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 44b6f4a4260..e0a28788238 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -380,6 +380,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; } diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index c4b52e1e61d..739ac86193a 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -181,6 +181,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; /* Stream output. */ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 47fa82a1e20..a4947154f17 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -631,6 +631,7 @@ enum pipe_cap PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR, PIPE_CAP_DEPTH_BOUNDS_TEST, PIPE_CAP_TGSI_TXQS, + PIPE_CAP_FORCE_PERSAMPLE_INTERP, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 266ebbafe36..4bf8d46c686 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -103,6 +103,7 @@ struct pipe_rasterizer_state unsigned point_tri_clip:1; /** large points clipped as tris or points */ unsigned point_size_per_vertex:1; /**< size computed in vertex shader */ unsigned multisample:1; /* XXX maybe more ms state in future */ + unsigned force_persample_interp:1; unsigned line_smooth:1; unsigned line_stipple_enable:1; unsigned line_last_pixel:1; -- cgit v1.2.3 From 4e9fc7e4e2fa3b3c77d08c4db545dcc279e849e9 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 19:43:00 +0200 Subject: st/mesa: set force_persample_interp if ARB_sample_shading is used This is only a half of the work. The next patch will handle gl_SampleID/SamplePos, which is the other half of ARB_sample_shading. Reviewed-by: Ilia Mirkin --- src/mesa/state_tracker/st_atom_rasterizer.c | 8 ++++++++ src/mesa/state_tracker/st_atom_shader.c | 1 + src/mesa/state_tracker/st_context.c | 2 ++ src/mesa/state_tracker/st_context.h | 1 + 4 files changed, 12 insertions(+) diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index cceed42c828..0f01e9939de 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -237,6 +237,14 @@ static void update_raster_state( struct st_context *st ) /* _NEW_MULTISAMPLE */ raster->multisample = ctx->Multisample._Enabled; + /* _NEW_MULTISAMPLE | _NEW_BUFFERS */ + raster->force_persample_interp = + st->can_force_persample_interp && + ctx->Multisample._Enabled && + ctx->Multisample.SampleShading && + ctx->Multisample.MinSampleShadingValue * + ctx->DrawBuffer->Visual.samples > 1; + /* _NEW_SCISSOR */ raster->scissor = ctx->Scissor.EnableFlags; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index fee15a980f3..dc0315698d8 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -72,6 +72,7 @@ update_fp( struct st_context *st ) /* Ignore sample qualifier while computing this flag. */ key.persample_shading = + !st->can_force_persample_interp && _mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1; st->fp_variant = st_get_fp_variant(st, stfp, &key); diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index f65aafa9d55..a9ab5edcf49 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -237,6 +237,8 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, PIPE_BIND_SAMPLER_VIEW); st->prefer_blit_based_texture_transfer = screen->get_param(screen, PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER); + st->can_force_persample_interp = screen->get_param(screen, + PIPE_CAP_FORCE_PERSAMPLE_INTERP); st->needs_texcoord_semantic = screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 81d5480431a..a4cda29059d 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -98,6 +98,7 @@ struct st_context boolean has_etc1; boolean has_etc2; boolean prefer_blit_based_texture_transfer; + boolean can_force_persample_interp; boolean needs_texcoord_semantic; boolean apply_texture_swizzle_to_border_color; -- cgit v1.2.3 From 6b0f21cb287bde3acaba1b0d18ab1c291acf327f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 27 Sep 2015 19:54:57 +0200 Subject: st/mesa: automatically set per-sample interpolation if using SampleID/Pos Reviewed-by: Ilia Mirkin --- src/mesa/state_tracker/st_atom_shader.c | 8 +++++++- src/mesa/state_tracker/st_program.c | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index dc0315698d8..1e880a107c0 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -70,9 +70,15 @@ update_fp( struct st_context *st ) key.clamp_color = st->clamp_frag_color_in_shader && st->ctx->Color._ClampFragmentColor; - /* Ignore sample qualifier while computing this flag. */ + /* Don't set it if the driver can force the interpolation by itself. + * If SAMPLE_ID or SAMPLE_POS are used, the interpolation is set + * automatically. + * Ignore sample qualifier while computing this flag. + */ key.persample_shading = !st->can_force_persample_interp && + !(stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID | + SYSTEM_BIT_SAMPLE_POS)) && _mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1; st->fp_variant = st_get_fp_variant(st, stfp, &key); diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 9c271476dd2..a07f8fec309 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -619,7 +619,9 @@ st_translate_fragment_program(struct st_context *st, else interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER; - if (key->persample_shading) + if (stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID | + SYSTEM_BIT_SAMPLE_POS) || + key->persample_shading) interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE; switch (attr) { -- cgit v1.2.3 From 55d406b71ee96dc7ee2dc2f9dd7df3bd80957f5a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 28 Sep 2015 21:44:54 +0200 Subject: tgsi/scan: add interpolation info into tgsi_shader_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 91 +++++++++++++++++++++++++++++++++- src/gallium/auxiliary/tgsi/tgsi_scan.h | 13 ++++- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 66306d7d5d2..00b07c877a8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -116,6 +116,53 @@ tgsi_scan_shader(const struct tgsi_token *tokens, break; } + if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID || + fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || + fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { + struct tgsi_full_src_register *src0 = &fullinst->Src[0]; + unsigned input; + + if (src0->Register.Indirect && src0->Indirect.ArrayID) + input = info->input_array_first[src0->Indirect.ArrayID]; + else + input = src0->Register.Index; + + /* For the INTERP opcodes, the interpolation is always + * PERSPECTIVE unless LINEAR is specified. + */ + switch (info->input_interpolate[input]) { + case TGSI_INTERPOLATE_COLOR: + case TGSI_INTERPOLATE_CONSTANT: + case TGSI_INTERPOLATE_PERSPECTIVE: + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_INTERP_CENTROID: + info->uses_persp_opcode_interp_centroid = true; + break; + case TGSI_OPCODE_INTERP_OFFSET: + info->uses_persp_opcode_interp_offset = true; + break; + case TGSI_OPCODE_INTERP_SAMPLE: + info->uses_persp_opcode_interp_sample = true; + break; + } + break; + + case TGSI_INTERPOLATE_LINEAR: + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_INTERP_CENTROID: + info->uses_linear_opcode_interp_centroid = true; + break; + case TGSI_OPCODE_INTERP_OFFSET: + info->uses_linear_opcode_interp_offset = true; + break; + case TGSI_OPCODE_INTERP_SAMPLE: + info->uses_linear_opcode_interp_sample = true; + break; + } + break; + } + } + if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) info->uses_doubles = true; @@ -236,8 +283,48 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; info->num_inputs++; - if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID) - info->uses_centroid = TRUE; + /* Only interpolated varyings. Don't include POSITION. + * Don't include integer varyings, because they are not + * interpolated. + */ + if (semName == TGSI_SEMANTIC_GENERIC || + semName == TGSI_SEMANTIC_TEXCOORD || + semName == TGSI_SEMANTIC_COLOR || + semName == TGSI_SEMANTIC_BCOLOR || + semName == TGSI_SEMANTIC_FOG || + semName == TGSI_SEMANTIC_CLIPDIST || + semName == TGSI_SEMANTIC_CULLDIST) { + switch (fulldecl->Interp.Interpolate) { + case TGSI_INTERPOLATE_COLOR: + case TGSI_INTERPOLATE_PERSPECTIVE: + switch (fulldecl->Interp.Location) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_persp_center = true; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_persp_centroid = true; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_persp_sample = true; + break; + } + break; + case TGSI_INTERPOLATE_LINEAR: + switch (fulldecl->Interp.Location) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_linear_center = true; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_linear_centroid = true; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_linear_sample = true; + break; + } + break; + /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */ + } + } if (semName == TGSI_SEMANTIC_PRIMID) info->uses_primid = TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 42539ee9f45..3ceb55717ee 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -82,7 +82,18 @@ struct tgsi_shader_info boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KILL or KILL_IF instruction used? */ - boolean uses_centroid; + boolean uses_persp_center; + boolean uses_persp_centroid; + boolean uses_persp_sample; + boolean uses_linear_center; + boolean uses_linear_centroid; + boolean uses_linear_sample; + boolean uses_persp_opcode_interp_centroid; + boolean uses_persp_opcode_interp_offset; + boolean uses_persp_opcode_interp_sample; + boolean uses_linear_opcode_interp_centroid; + boolean uses_linear_opcode_interp_offset; + boolean uses_linear_opcode_interp_sample; boolean uses_instanceid; boolean uses_vertexid; boolean uses_vertexid_nobase; -- cgit v1.2.3 From 214de2d815360aa3986eb52a3b3060c33523f1b3 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 28 Sep 2015 17:01:21 +0200 Subject: radeonsi: move SPI_PS_INPUT_ENA/ADDR registers to a separate state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be a derived state used for changing center->sample and centroid->sample at runtime. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_hw_context.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 40 ++++++++++++++++--------- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 17d89d16e24..7c147e2e44c 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -171,6 +171,7 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->db_render_state); si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); si_mark_atom_dirty(ctx, &ctx->spi_map); + si_mark_atom_dirty(ctx, &ctx->spi_ps_input); si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_all_descriptors_begin_new_cs(ctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a882d36e170..60e97015cce 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -188,6 +188,7 @@ struct si_context { struct si_viewports viewports; struct si_stencil_ref stencil_ref; struct r600_atom spi_map; + struct r600_atom spi_ps_input; /* Precomputed states. */ struct si_pm4_state *init_config; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 3fc0799c2b4..2257499f33c 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -123,6 +123,7 @@ union si_state_atoms { struct r600_atom *viewports; struct r600_atom *stencil_ref; struct r600_atom *spi_map; + struct r600_atom *spi_ps_input; } s; struct r600_atom *array[0]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 31c0ab95464..43985330f6b 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -400,7 +400,7 @@ static void si_shader_ps(struct si_shader *shader) struct si_pm4_state *pm4; unsigned i, spi_ps_in_control; unsigned num_sgprs, num_user_sgprs; - unsigned spi_baryc_cntl = 0, spi_ps_input_ena; + unsigned spi_baryc_cntl = 0; uint64_t va; pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -437,19 +437,6 @@ static void si_shader_ps(struct si_shader *shader) S_0286D8_BC_OPTIMIZE_DISABLE(1); si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); - spi_ps_input_ena = shader->spi_ps_input_ena; - /* we need to enable at least one of them, otherwise we hang the GPU */ - assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) || - G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) || - G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) || - G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) || - G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) || - G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) || - G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) || - G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena)); - - si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena); - si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena); si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control); si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, shader->spi_shader_z_format); @@ -1064,6 +1051,27 @@ bcolor: assert(ps->nparam == num_written); } +static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + struct si_shader *ps = sctx->ps_shader->current; + unsigned input_ena = ps->spi_ps_input_ena; + + /* we need to enable at least one of them, otherwise we hang the GPU */ + assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || + G_0286CC_PERSP_CENTER_ENA(input_ena) || + G_0286CC_PERSP_CENTROID_ENA(input_ena) || + G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) || + G_0286CC_LINEAR_SAMPLE_ENA(input_ena) || + G_0286CC_LINEAR_CENTER_ENA(input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(input_ena) || + G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); + + radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2); + radeon_emit(cs, input_ena); + radeon_emit(cs, input_ena); +} + /* Initialize state related to ESGS / GSVS ring buffers */ static void si_init_gs_rings(struct si_context *sctx) { @@ -1535,6 +1543,9 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->spi_map); } + if (si_pm4_state_changed(sctx, ps)) + si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + if (si_pm4_state_changed(sctx, ls) || si_pm4_state_changed(sctx, hs) || si_pm4_state_changed(sctx, es) || @@ -1563,6 +1574,7 @@ bool si_update_shaders(struct si_context *sctx) void si_init_shader_functions(struct si_context *sctx) { si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); + si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input); sctx->b.b.create_vs_state = si_create_vs_state; sctx->b.b.create_tcs_state = si_create_tcs_state; -- cgit v1.2.3 From 9652bfcf2d2f3be5158ed88b49917bb5a2d8323d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 28 Sep 2015 17:21:10 +0200 Subject: radeonsi: implement the simple case of force_persample_interp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 1 + src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 35 ++++++++++++++++++++++++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 60e97015cce..3ff4b46251e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -209,6 +209,7 @@ struct si_context { struct si_vertex_element *vertex_elements; unsigned sprite_coord_enable; bool flatshade; + bool force_persample_interp; /* shader descriptors */ struct si_descriptors vertex_buffers; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 85074bdbf5b..00d4bc1fbc2 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -686,6 +686,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->two_side = state->light_twoside; rs->multisample_enable = state->multisample; + rs->force_persample_interp = state->force_persample_interp; rs->clip_plane_enable = state->clip_plane_enable; rs->line_stipple_enable = state->line_stipple_enable; rs->poly_stipple_enable = state->poly_stipple_enable; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 2257499f33c..6a567688ee4 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -50,6 +50,7 @@ struct si_state_rasterizer { bool flatshade; bool two_side; bool multisample_enable; + bool force_persample_interp; bool line_stipple_enable; unsigned sprite_coord_enable; unsigned pa_sc_line_stipple; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 43985330f6b..aeb28797ace 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1067,6 +1067,36 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom G_0286CC_LINEAR_CENTROID_ENA(input_ena) || G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); + if (sctx->force_persample_interp) { + unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) + + G_0286CC_PERSP_CENTER_ENA(input_ena) + + G_0286CC_PERSP_CENTROID_ENA(input_ena); + unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) + + G_0286CC_LINEAR_CENTER_ENA(input_ena) + + G_0286CC_LINEAR_CENTROID_ENA(input_ena); + + /* If only one set of (i,j) coordinates is used, we can disable + * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates + * where CENTER/CENTROID are expected, effectively forcing per-sample + * interpolation. + */ + if (num_persp == 1) { + input_ena &= C_0286CC_PERSP_CENTER_ENA; + input_ena &= C_0286CC_PERSP_CENTROID_ENA; + input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1); + } + if (num_linear == 1) { + input_ena &= C_0286CC_LINEAR_CENTER_ENA; + input_ena &= C_0286CC_LINEAR_CENTROID_ENA; + input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1); + } + + /* If at least 2 sets of coordinates are used, we can't use this + * trick and have to select SAMPLE using a conditional assignment + * in the shader with "force_persample_interp" being a shader constant. + */ + } + radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2); radeon_emit(cs, input_ena); radeon_emit(cs, input_ena); @@ -1543,8 +1573,11 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (si_pm4_state_changed(sctx, ps)) + if (si_pm4_state_changed(sctx, ps) || + sctx->force_persample_interp != rs->force_persample_interp) { + sctx->force_persample_interp = rs->force_persample_interp; si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + } if (si_pm4_state_changed(sctx, ls) || si_pm4_state_changed(sctx, hs) || -- cgit v1.2.3 From b3c55fc669b54589e57a112df75094405e16ff52 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 28 Sep 2015 23:46:04 +0200 Subject: radeonsi: do force_persample_interp in shaders for non-trivial cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_shader.c | 53 ++++++++++++++++++++++++- src/gallium/drivers/radeonsi/si_shader.h | 49 ++++++++++++++--------- src/gallium/drivers/radeonsi/si_state_shaders.c | 34 ++++++++++++++++ 3 files changed, 117 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a3df64824c6..32a702fcdf5 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -855,6 +855,56 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) } } +/* This shouldn't be used by explicit INTERP opcodes. */ +static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, + unsigned param) +{ + struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; + unsigned sample_param = 0; + LLVMValueRef default_ij, sample_ij, force_sample; + + default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param); + + /* If the shader doesn't use center/centroid, just return the parameter. + * + * If the shader only uses one set of (i,j), "si_emit_spi_ps_input" can + * switch between center/centroid and sample without shader changes. + */ + switch (param) { + case SI_PARAM_PERSP_CENTROID: + case SI_PARAM_PERSP_CENTER: + if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp) + return default_ij; + + sample_param = SI_PARAM_PERSP_SAMPLE; + break; + + case SI_PARAM_LINEAR_CENTROID: + case SI_PARAM_LINEAR_CENTER: + if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear) + return default_ij; + + sample_param = SI_PARAM_LINEAR_SAMPLE; + break; + + default: + return default_ij; + } + + /* Otherwise, we have to select (i,j) based on a user data SGPR. */ + sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param); + + /* TODO: this can be done more efficiently by switching between + * 2 prologs. + */ + force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_PS_STATE_BITS); + force_sample = LLVMBuildTrunc(gallivm->builder, force_sample, + LLVMInt1TypeInContext(gallivm->context), ""); + return LLVMBuildSelect(gallivm->builder, force_sample, + sample_ij, default_ij, ""); +} + static void declare_input_fs( struct radeon_llvm_context *radeon_bld, unsigned input_index, @@ -925,7 +975,7 @@ static void declare_input_fs( if (interp_param_idx == -1) return; else if (interp_param_idx) - interp_param = LLVMGetParam(main_fn, interp_param_idx); + interp_param = get_interp_param(si_shader_ctx, interp_param_idx); /* fs.constant returns the param from the middle vertex, so it's not * really useful for flat shading. It's meant to be used for custom @@ -3458,6 +3508,7 @@ static void create_function(struct si_shader_context *si_shader_ctx) case TGSI_PROCESSOR_FRAGMENT: params[SI_PARAM_ALPHA_REF] = f32; + params[SI_PARAM_PS_STATE_BITS] = i32; params[SI_PARAM_PRIM_MASK] = i32; last_sgpr = SI_PARAM_PRIM_MASK; params[SI_PARAM_PERSP_SAMPLE] = v2i32; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 2305b9988b8..b92fa02a171 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -88,6 +88,7 @@ struct radeon_shader_reloc; #define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ #define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ #define SI_SGPR_ALPHA_REF 8 /* PS only */ +#define SI_SGPR_PS_STATE_BITS 9 /* PS only */ #define SI_VS_NUM_USER_SGPR 12 #define SI_LS_NUM_USER_SGPR 13 @@ -95,7 +96,7 @@ struct radeon_shader_reloc; #define SI_TES_NUM_USER_SGPR 10 #define SI_GS_NUM_USER_SGPR 8 #define SI_GSCOPY_NUM_USER_SGPR 4 -#define SI_PS_NUM_USER_SGPR 9 +#define SI_PS_NUM_USER_SGPR 10 /* LLVM function parameter indices */ #define SI_PARAM_RW_BUFFERS 0 @@ -148,23 +149,27 @@ struct radeon_shader_reloc; /* PS only parameters */ #define SI_PARAM_ALPHA_REF 4 -#define SI_PARAM_PRIM_MASK 5 -#define SI_PARAM_PERSP_SAMPLE 6 -#define SI_PARAM_PERSP_CENTER 7 -#define SI_PARAM_PERSP_CENTROID 8 -#define SI_PARAM_PERSP_PULL_MODEL 9 -#define SI_PARAM_LINEAR_SAMPLE 10 -#define SI_PARAM_LINEAR_CENTER 11 -#define SI_PARAM_LINEAR_CENTROID 12 -#define SI_PARAM_LINE_STIPPLE_TEX 13 -#define SI_PARAM_POS_X_FLOAT 14 -#define SI_PARAM_POS_Y_FLOAT 15 -#define SI_PARAM_POS_Z_FLOAT 16 -#define SI_PARAM_POS_W_FLOAT 17 -#define SI_PARAM_FRONT_FACE 18 -#define SI_PARAM_ANCILLARY 19 -#define SI_PARAM_SAMPLE_COVERAGE 20 -#define SI_PARAM_POS_FIXED_PT 21 +/* Bits: + * 0: force_persample_interp + */ +#define SI_PARAM_PS_STATE_BITS 5 +#define SI_PARAM_PRIM_MASK 6 +#define SI_PARAM_PERSP_SAMPLE 7 +#define SI_PARAM_PERSP_CENTER 8 +#define SI_PARAM_PERSP_CENTROID 9 +#define SI_PARAM_PERSP_PULL_MODEL 10 +#define SI_PARAM_LINEAR_SAMPLE 11 +#define SI_PARAM_LINEAR_CENTER 12 +#define SI_PARAM_LINEAR_CENTROID 13 +#define SI_PARAM_LINE_STIPPLE_TEX 14 +#define SI_PARAM_POS_X_FLOAT 15 +#define SI_PARAM_POS_Y_FLOAT 16 +#define SI_PARAM_POS_Z_FLOAT 17 +#define SI_PARAM_POS_W_FLOAT 18 +#define SI_PARAM_FRONT_FACE 19 +#define SI_PARAM_ANCILLARY 20 +#define SI_PARAM_SAMPLE_COVERAGE 21 +#define SI_PARAM_POS_FIXED_PT 22 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) @@ -182,6 +187,14 @@ struct si_shader_selector { /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; + /* Whether the shader has to use a conditional assignment to + * choose between weights when emulating + * pipe_rasterizer_state::force_persample_interp. + * If false, "si_emit_spi_ps_input" will take care of it instead. + */ + bool forces_persample_interp_for_persp; + bool forces_persample_interp_for_linear; + unsigned gs_output_prim; unsigned gs_max_out_vertices; unsigned gs_num_invocations; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index aeb28797ace..77c585f958e 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -667,6 +667,34 @@ static void *si_create_shader_state(struct pipe_context *ctx, tgsi_scan_shader(state->tokens, &sel->info); p_atomic_inc(&sscreen->b.num_shaders_created); + /* First set which opcode uses which (i,j) pair. */ + if (sel->info.uses_persp_opcode_interp_centroid) + sel->info.uses_persp_centroid = true; + + if (sel->info.uses_linear_opcode_interp_centroid) + sel->info.uses_linear_centroid = true; + + if (sel->info.uses_persp_opcode_interp_offset || + sel->info.uses_persp_opcode_interp_sample) + sel->info.uses_persp_center = true; + + if (sel->info.uses_linear_opcode_interp_offset || + sel->info.uses_linear_opcode_interp_sample) + sel->info.uses_linear_center = true; + + /* Determine if the shader has to use a conditional assignment when + * emulating force_persample_interp. + */ + sel->forces_persample_interp_for_persp = + sel->info.uses_persp_center + + sel->info.uses_persp_centroid + + sel->info.uses_persp_sample >= 2; + + sel->forces_persample_interp_for_linear = + sel->info.uses_linear_center + + sel->info.uses_linear_centroid + + sel->info.uses_linear_sample >= 2; + switch (pipe_shader_type) { case PIPE_SHADER_GEOMETRY: sel->gs_output_prim = @@ -1100,6 +1128,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2); radeon_emit(cs, input_ena); radeon_emit(cs, input_ena); + + if (ps->selector->forces_persample_interp_for_persp || + ps->selector->forces_persample_interp_for_linear) + radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + + SI_SGPR_PS_STATE_BITS * 4, + sctx->force_persample_interp); } /* Initialize state related to ESGS / GSVS ring buffers */ -- cgit v1.2.3 From 814b7d1ab9e90567034e9601a420ed1be2970c15 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 28 Sep 2015 23:50:12 +0200 Subject: radeonsi: enable PIPE_CAP_FORCE_PERSAMPLE_INTERP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now st/mesa won't generate 2 variants for this state. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index a784db624e3..a0283b7c966 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -295,6 +295,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -336,7 +337,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: -- cgit v1.2.3 From 6994ca20aad110734f87ef5297fecd53e8f6e34e Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Sun, 4 Oct 2015 17:42:41 +1100 Subject: glsl: fix whitespace Reviewed-by: Iago Toral Quiroga --- src/glsl/ast_to_hir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 7bcf1820b19..9511440ba3a 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -6357,7 +6357,7 @@ ast_interface_block::hir(exec_list *instructions, const glsl_type *block_array_type = process_array_type(&loc, block_type, this->array_specifier, state); - /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: + /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: * * * Arrays of arrays of blocks are not allowed */ -- cgit v1.2.3 From f2a4b40cf15cbc5eaab1776ad275ed8eead3322f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 1 Oct 2015 00:36:25 -0700 Subject: nir/lower_io: Make get_io_offset() return a nir_ssa_def * for indirects. get_io_offset() already walks the dereference chain and discovers whether or not we have an indirect; we can just return that rather than computing it a second time via deref_has_indirect(). This means moving the call a bit earlier. By returning a nir_ssa_def *, we can pass back both an existence flag (via NULL checking the pointer) and the value in one parameter. It also simplifies the code somewhat. nir_lower_samplers works in a similar fashion. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_lower_io.c | 62 +++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 30fad855e6f..b1cf7bec6d2 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -63,25 +63,12 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, *size = location; } -static bool -deref_has_indirect(nir_deref_var *deref) -{ - for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { - if (tail->deref_type == nir_deref_type_array) { - nir_deref_array *arr = nir_deref_as_array(tail); - if (arr->deref_array_type == nir_deref_array_type_indirect) - return true; - } - } - - return false; -} - static unsigned -get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, +get_io_offset(nir_deref_var *deref, nir_instr *instr, + nir_ssa_def **out_indirect, struct lower_io_state *state) { - bool found_indirect = false; + nir_ssa_def *indirect = NULL; unsigned base_offset = 0; nir_builder *b = &state->builder; @@ -103,14 +90,7 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, nir_imul(b, nir_imm_int(b, size), nir_ssa_for_src(b, deref_array->indirect, 1)); - if (found_indirect) { - indirect->ssa = - nir_iadd(b, nir_ssa_for_src(b, *indirect, 1), mul); - } else { - indirect->ssa = mul; - } - indirect->is_ssa = true; - found_indirect = true; + indirect = indirect ? nir_iadd(b, indirect, mul) : mul; } } else if (tail->deref_type == nir_deref_type_struct) { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); @@ -122,6 +102,7 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, } } + *out_indirect = indirect; return base_offset; } @@ -169,17 +150,16 @@ nir_lower_io_block(nir_block *block, void *void_state) if (mode != nir_var_shader_in && mode != nir_var_uniform) continue; - bool has_indirect = deref_has_indirect(intrin->variables[0]); + nir_ssa_def *indirect; + + unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, + &indirect, state); nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, - load_op(mode, has_indirect)); + load_op(mode, indirect)); load->num_components = intrin->num_components; - nir_src indirect; - unsigned offset = get_io_offset(intrin->variables[0], - &intrin->instr, &indirect, state); - unsigned location = intrin->variables[0]->var->data.driver_location; if (mode == nir_var_uniform) { load->const_index[0] = location; @@ -188,8 +168,8 @@ nir_lower_io_block(nir_block *block, void *void_state) load->const_index[0] = location + offset; } - if (has_indirect) - load->src[0] = indirect; + if (indirect) + load->src[0] = nir_src_for_ssa(indirect); if (intrin->dest.is_ssa) { nir_ssa_dest_init(&load->instr, &load->dest, @@ -209,10 +189,14 @@ nir_lower_io_block(nir_block *block, void *void_state) if (intrin->variables[0]->var->data.mode != nir_var_shader_out) continue; - bool has_indirect = deref_has_indirect(intrin->variables[0]); + nir_ssa_def *indirect; + + unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, + &indirect, state); + offset += intrin->variables[0]->var->data.driver_location; nir_intrinsic_op store_op; - if (has_indirect) { + if (indirect) { store_op = nir_intrinsic_store_output_indirect; } else { store_op = nir_intrinsic_store_output; @@ -221,18 +205,12 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, store_op); store->num_components = intrin->num_components; - - nir_src indirect; - unsigned offset = get_io_offset(intrin->variables[0], - &intrin->instr, &indirect, state); - offset += intrin->variables[0]->var->data.driver_location; - store->const_index[0] = offset; nir_src_copy(&store->src[0], &intrin->src[0], store); - if (has_indirect) - store->src[1] = indirect; + if (indirect) + store->src[1] = nir_src_for_ssa(indirect); nir_instr_insert_before(&intrin->instr, &store->instr); nir_instr_remove(&intrin->instr); -- cgit v1.2.3 From 5d7f8cb5a511977e256e773716fac3415d01443e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 1 Oct 2015 00:46:19 -0700 Subject: nir: Introduce new nir_intrinsic_load_per_vertex_input intrinsics. Geometry and tessellation shaders process multiple vertices; their inputs are arrays indexed by the vertex number. While GLSL makes this look like a normal array, it can be very different behind the scenes. On Intel hardware, all inputs for a particular vertex are stored together - as if they were grouped into a single struct. This means that consecutive elements of these top-level arrays are not contiguous. In fact, they may sometimes be in completely disjoint memory segments. NIR's existing load_input intrinsics are awkward for this case, as they distill everything down to a single offset. We'd much rather keep the vertex ID separate, but build up an offset as normal beyond that. This patch introduces new nir_intrinsic_load_per_vertex_input intrinsics to handle this case. They work like ordinary load_input intrinsics, but have an extra source (src[0]) which represents the outermost array index. v2: Rebase on earlier refactors. v3: Use ssa defs instead of nir_srcs, rebase on earlier refactors. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_intrinsics.h | 1 + src/glsl/nir/nir_lower_io.c | 55 ++++++++++++++++++++++--- src/glsl/nir/nir_print.c | 2 + src/mesa/drivers/dri/i965/brw_nir.c | 13 +++++- src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 58 +++++++++++---------------- 5 files changed, 86 insertions(+), 43 deletions(-) diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index ac4c2ba0eb2..263d8c14f4a 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -228,6 +228,7 @@ SYSTEM_VALUE(num_work_groups, 3, 0) LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) /* diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index b1cf7bec6d2..688b48f4675 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -63,8 +63,20 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, *size = location; } +/** + * Returns true if we're processing a stage whose inputs are arrays indexed + * by a vertex number (such as geometry shader inputs). + */ +static bool +stage_uses_per_vertex_inputs(struct lower_io_state *state) +{ + gl_shader_stage stage = state->builder.shader->stage; + return stage == MESA_SHADER_GEOMETRY; +} + static unsigned get_io_offset(nir_deref_var *deref, nir_instr *instr, + nir_ssa_def **vertex_index, nir_ssa_def **out_indirect, struct lower_io_state *state) { @@ -75,6 +87,22 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, b->cursor = nir_before_instr(instr); nir_deref *tail = &deref->deref; + + /* For per-vertex input arrays (i.e. geometry shader inputs), keep the + * outermost array index separate. Process the rest normally. + */ + if (vertex_index != NULL) { + tail = tail->child; + assert(tail->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(tail); + + nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1)); + } + *vertex_index = vtx; + } + while (tail->child != NULL) { const struct glsl_type *parent_type = tail->type; tail = tail->child; @@ -107,13 +135,19 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, } static nir_intrinsic_op -load_op(nir_variable_mode mode, bool has_indirect) +load_op(struct lower_io_state *state, + nir_variable_mode mode, bool per_vertex, bool has_indirect) { nir_intrinsic_op op; switch (mode) { case nir_var_shader_in: - op = has_indirect ? nir_intrinsic_load_input_indirect : - nir_intrinsic_load_input; + if (per_vertex) { + op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect : + nir_intrinsic_load_per_vertex_input; + } else { + op = has_indirect ? nir_intrinsic_load_input_indirect : + nir_intrinsic_load_input; + } break; case nir_var_uniform: op = has_indirect ? nir_intrinsic_load_uniform_indirect : @@ -150,14 +184,20 @@ nir_lower_io_block(nir_block *block, void *void_state) if (mode != nir_var_shader_in && mode != nir_var_uniform) continue; + bool per_vertex = stage_uses_per_vertex_inputs(state) && + mode == nir_var_shader_in; + nir_ssa_def *indirect; + nir_ssa_def *vertex_index; unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, + per_vertex ? &vertex_index : NULL, &indirect, state); nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, - load_op(mode, indirect)); + load_op(state, mode, per_vertex, + indirect)); load->num_components = intrin->num_components; unsigned location = intrin->variables[0]->var->data.driver_location; @@ -168,8 +208,11 @@ nir_lower_io_block(nir_block *block, void *void_state) load->const_index[0] = location + offset; } + if (per_vertex) + load->src[0] = nir_src_for_ssa(vertex_index); + if (indirect) - load->src[0] = nir_src_for_ssa(indirect); + load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect); if (intrin->dest.is_ssa) { nir_ssa_dest_init(&load->instr, &load->dest, @@ -192,7 +235,7 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_ssa_def *indirect; unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, - &indirect, state); + NULL, &indirect, state); offset += intrin->variables[0]->var->data.driver_location; nir_intrinsic_op store_op; diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 3936bae078b..09663996869 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -443,6 +443,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) break; case nir_intrinsic_load_input: case nir_intrinsic_load_input_indirect: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_input_indirect: var_list = &state->shader->inputs; break; case nir_intrinsic_store_output: diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 12f47ad0ded..80f36dc2399 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -30,8 +30,17 @@ static void brw_nir_lower_inputs(nir_shader *nir, bool is_scalar) { - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, - is_scalar ? type_size_scalar : type_size_vec4); + switch (nir->stage) { + case MESA_SHADER_GEOMETRY: + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } + break; + default: + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, + is_scalar ? type_size_scalar : type_size_vec4); + break; + } } static void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index af4c102c026..1b929b3df2c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -29,41 +29,6 @@ namespace brw { void vec4_gs_visitor::nir_setup_inputs() { - nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs); - - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - int offset = var->data.driver_location; - if (var->type->base_type == GLSL_TYPE_ARRAY) { - /* Geometry shader inputs are arrays, but they use an unusual array - * layout: instead of all array elements for a given geometry shader - * input being stored consecutively, all geometry shader inputs are - * interleaved into one giant array. At this stage of compilation, we - * assume that the stride of the array is BRW_VARYING_SLOT_COUNT. - * Later, setup_attributes() will remap our accesses to the actual - * input array. - */ - assert(var->type->length > 0); - int length = var->type->length; - int size = type_size_vec4(var->type) / length; - for (int i = 0; i < length; i++) { - int location = var->data.location + i * BRW_VARYING_SLOT_COUNT; - for (int j = 0; j < size; j++) { - src_reg src = src_reg(ATTR, location + j, var->type); - src = retype(src, brw_type_for_base_type(var->type)); - nir_inputs[offset] = src; - offset++; - } - } - } else { - int size = type_size_vec4(var->type); - for (int i = 0; i < size; i++) { - src_reg src = src_reg(ATTR, var->data.location + i, var->type); - src = retype(src, brw_type_for_base_type(var->type)); - nir_inputs[offset] = src; - offset++; - } - } - } } void @@ -96,6 +61,29 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg src; switch (instr->intrinsic) { + case nir_intrinsic_load_per_vertex_input_indirect: + assert(!"EmitNoIndirectInput should prevent this."); + case nir_intrinsic_load_per_vertex_input: { + /* The EmitNoIndirectInput flag guarantees our vertex index will + * be constant. We should handle indirects someday. + */ + nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); + + /* Make up a type...we have no way of knowing... */ + const glsl_type *const type = glsl_type::ivec(instr->num_components); + + src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] + + instr->const_index[0], type); + dest = get_nir_dest(instr->dest, src.type); + dest.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dest, src)); + break; + } + + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_indirect: + unreachable("nir_lower_io should have produced per_vertex intrinsics"); + case nir_intrinsic_emit_vertex_with_counter: { this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); -- cgit v1.2.3 From 7768b802e57253e6a91b908e9b855d23f0fd2d3f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 2 Oct 2015 15:55:05 -0700 Subject: nir: Add a nir_shader_info::has_transform_feedback_varyings flag. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/glsl/nir/glsl_to_nir.cpp | 2 ++ src/glsl/nir/nir.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 4dd62870065..efaa73e12f1 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -164,6 +164,8 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, shader->info.separate_shader = shader_prog->SeparateShader; shader->info.gs.vertices_out = sh->Geom.VerticesOut; shader->info.gs.invocations = sh->Geom.Invocations; + shader->info.has_transform_feedback_varyings = + shader_prog->TransformFeedback.NumVarying > 0; return shader; } diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 268fbc25a33..bde9f49a90c 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1487,6 +1487,9 @@ typedef struct nir_shader_info { /* Whether or not separate shader objects were used */ bool separate_shader; + /** Was this shader linked with any transform feedback varyings? */ + bool has_transform_feedback_varyings; + struct { /** The maximum number of vertices the geometry shader might write. */ unsigned vertices_out; -- cgit v1.2.3 From 21585048a2e21ea9726e7f0b9f9fd98d1105295c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 2 Oct 2015 16:45:09 -0700 Subject: i965: Use nir->has_transform_feedback_varyings to avoid shader_prog. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index c673ccd137c..74ef7286721 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -473,7 +473,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) * be recorded by transform feedback, we can simply discard all geometry * bound to these streams when transform feedback is disabled. */ - if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0) + if (stream_id > 0 && !nir->info.has_transform_feedback_varyings) return; /* If we're outputting 32 control data bits or less, then we can wait -- cgit v1.2.3 From b85757bc72350df609f50e000512bc80d07f1497 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 2 Oct 2015 16:40:14 -0700 Subject: i965: Remove shader_prog from vec4_gs_visitor. Unfortunately it has to stay in gen6_gs_visitor. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 6 ++---- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 3 --- src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 9 +++++++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 74ef7286721..f6967a74d4e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -37,7 +37,6 @@ namespace brw { vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, - struct gl_shader_program *prog, nir_shader *shader, void *mem_ctx, bool no_spills, @@ -45,7 +44,6 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, : vec4_visitor(compiler, log_data, &c->key.tex, &c->prog_data.base, shader, mem_ctx, no_spills, shader_time_index), - shader_prog(prog), c(c) { } @@ -641,7 +639,7 @@ brw_gs_emit(struct brw_context *brw, c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_gs_visitor v(brw->intelScreen->compiler, brw, - c, prog, shader->Program->nir, + c, shader->Program->nir, mem_ctx, true /* no_spills */, st_index); if (v.run()) { return generate_assembly(brw, prog, &c->gp->program.Base, @@ -684,7 +682,7 @@ brw_gs_emit(struct brw_context *brw, if (brw->gen >= 7) gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw, - c, prog, shader->Program->nir, + c, shader->Program->nir, mem_ctx, false /* no_spills */, st_index); else diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index 85d80b8fc63..da93f0dc1c6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -70,7 +70,6 @@ public: vec4_gs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, - struct gl_shader_program *prog, nir_shader *shader, void *mem_ctx, bool no_spills, @@ -97,8 +96,6 @@ protected: void emit_control_data_bits(); void set_stream_control_data_bits(unsigned stream_id); - struct gl_shader_program *shader_prog; - src_reg vertex_count; src_reg control_data_bits; const struct brw_gs_compile * const c; diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index 41c6d183acd..e75d6aa10b8 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -43,8 +43,11 @@ public: void *mem_ctx, bool no_spills, int shader_time_index) : - vec4_gs_visitor(comp, log_data, c, prog, shader, mem_ctx, no_spills, - shader_time_index) {} + vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills, + shader_time_index), + shader_prog(prog) + { + } protected: virtual void emit_prolog(); @@ -64,6 +67,8 @@ private: void xfb_setup(); int get_vertex_output_offset_for_varying(int vertex, int varying); + const struct gl_shader_program *shader_prog; + src_reg vertex_output; src_reg vertex_output_offset; src_reg temp; -- cgit v1.2.3 From 763cd8c080353482cb41da578cb3d6f7892a0c9f Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Wed, 30 Sep 2015 11:00:02 +1000 Subject: glsl: reduce memory footprint of uniform_storage struct The uniform will only be of a single type so store the data for opaque types in a single array. Cc: Francisco Jerez Cc: Ilia Mirkin --- src/glsl/ir_uniform.h | 6 +--- src/glsl/link_uniform_initializers.cpp | 12 +++---- src/glsl/link_uniforms.cpp | 41 +++++++++++------------- src/glsl/linker.cpp | 2 +- src/glsl/nir/nir_lower_samplers.c | 4 +-- src/glsl/tests/set_uniform_initializer_tests.cpp | 8 ++--- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- src/mesa/main/shaderapi.c | 2 +- src/mesa/main/uniform_query.cpp | 8 ++--- src/mesa/program/ir_to_mesa.cpp | 5 +-- src/mesa/program/sampler.cpp | 4 +-- 11 files changed, 43 insertions(+), 51 deletions(-) diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h index 858a7da6bb9..50fe76b7ea2 100644 --- a/src/glsl/ir_uniform.h +++ b/src/glsl/ir_uniform.h @@ -110,11 +110,7 @@ struct gl_uniform_storage { */ bool initialized; - struct gl_opaque_uniform_index sampler[MESA_SHADER_STAGES]; - - struct gl_opaque_uniform_index image[MESA_SHADER_STAGES]; - - struct gl_opaque_uniform_index subroutine[MESA_SHADER_STAGES]; + struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES]; /** * Storage used by the driver for the uniform diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index 34830829b4a..0918d2af9b8 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -134,16 +134,16 @@ set_opaque_binding(gl_shader_program *prog, const char *name, int binding) if (shader) { if (storage->type->base_type == GLSL_TYPE_SAMPLER && - storage->sampler[sh].active) { + storage->opaque[sh].active) { for (unsigned i = 0; i < elements; i++) { - const unsigned index = storage->sampler[sh].index + i; + const unsigned index = storage->opaque[sh].index + i; shader->SamplerUnits[index] = storage->storage[i].i; } } else if (storage->type->base_type == GLSL_TYPE_IMAGE && - storage->image[sh].active) { + storage->opaque[sh].active) { for (unsigned i = 0; i < elements; i++) { - const unsigned index = storage->image[sh].index + i; + const unsigned index = storage->opaque[sh].index + i; shader->ImageUnits[index] = storage->storage[i].i; } } @@ -243,8 +243,8 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { gl_shader *shader = prog->_LinkedShaders[sh]; - if (shader && storage->sampler[sh].active) { - unsigned index = storage->sampler[sh].index; + if (shader && storage->opaque[sh].active) { + unsigned index = storage->opaque[sh].index; shader->SamplerUnits[index] = storage->storage[0].i; } diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 740b0a46aee..0642ddc4bf5 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -566,7 +566,7 @@ private: struct gl_uniform_storage *uniform, const char *name) { if (base_type->is_sampler()) { - uniform->sampler[shader_type].active = true; + uniform->opaque[shader_type].active = true; /* Handle multiple samplers inside struct arrays */ if (this->record_array_count > 1) { @@ -586,8 +586,8 @@ private: /* In this case, we've already seen this uniform so we just use * the next sampler index recorded the last time we visited. */ - uniform->sampler[shader_type].index = index; - index = inner_array_size + uniform->sampler[shader_type].index; + uniform->opaque[shader_type].index = index; + index = inner_array_size + uniform->opaque[shader_type].index; this->record_next_sampler->put(index, name_copy); ralloc_free(name_copy); @@ -605,13 +605,13 @@ private: * structs. This allows the offset to be easily calculated for * indirect indexing. */ - uniform->sampler[shader_type].index = this->next_sampler; + uniform->opaque[shader_type].index = this->next_sampler; this->next_sampler += inner_array_size * this->record_array_count; /* Store the next index for future passes over the struct array */ - index = uniform->sampler[shader_type].index + inner_array_size; + index = uniform->opaque[shader_type].index + inner_array_size; this->record_next_sampler->put(index, name_copy); ralloc_free(name_copy); } @@ -619,22 +619,19 @@ private: /* Increment the sampler by 1 for non-arrays and by the number of * array elements for arrays. */ - uniform->sampler[shader_type].index = this->next_sampler; + uniform->opaque[shader_type].index = this->next_sampler; this->next_sampler += MAX2(1, uniform->array_elements); } const gl_texture_index target = base_type->sampler_index(); const unsigned shadow = base_type->sampler_shadow; - for (unsigned i = uniform->sampler[shader_type].index; + for (unsigned i = uniform->opaque[shader_type].index; i < MIN2(this->next_sampler, MAX_SAMPLERS); i++) { this->targets[i] = target; this->shader_samplers_used |= 1U << i; this->shader_shadow_samplers |= shadow << i; } - } else { - uniform->sampler[shader_type].index = ~0; - uniform->sampler[shader_type].active = false; } } @@ -642,17 +639,14 @@ private: struct gl_uniform_storage *uniform) { if (base_type->is_image()) { - uniform->image[shader_type].index = this->next_image; - uniform->image[shader_type].active = true; + uniform->opaque[shader_type].index = this->next_image; + uniform->opaque[shader_type].active = true; /* Increment the image index by 1 for non-arrays and by the * number of array elements for arrays. */ this->next_image += MAX2(1, uniform->array_elements); - } else { - uniform->image[shader_type].index = ~0; - uniform->image[shader_type].active = false; } } @@ -660,17 +654,14 @@ private: struct gl_uniform_storage *uniform) { if (base_type->is_subroutine()) { - uniform->subroutine[shader_type].index = this->next_subroutine; - uniform->subroutine[shader_type].active = true; + uniform->opaque[shader_type].index = this->next_subroutine; + uniform->opaque[shader_type].active = true; /* Increment the subroutine index by 1 for non-arrays and by the * number of array elements for arrays. */ this->next_subroutine += MAX2(1, uniform->array_elements); - } else { - uniform->subroutine[shader_type].index = ~0; - uniform->subroutine[shader_type].active = false; } } @@ -738,6 +729,10 @@ private: base_type = type; } + /* Initialise opaque data */ + this->uniforms[id].opaque[shader_type].index = ~0; + this->uniforms[id].opaque[shader_type].active = false; + /* This assigns uniform indices to sampler and image uniforms. */ handle_samplers(base_type, &this->uniforms[id], name); handle_images(base_type, &this->uniforms[id]); @@ -1029,7 +1024,7 @@ link_set_image_access_qualifiers(struct gl_shader_program *prog) assert(found); (void) found; const gl_uniform_storage *storage = &prog->UniformStorage[id]; - const unsigned index = storage->image[i].index; + const unsigned index = storage->opaque[i].index; const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : var->data.image_write_only ? GL_WRITE_ONLY : GL_READ_WRITE); @@ -1238,7 +1233,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog, if (!sh) continue; - if (!uniforms[i].subroutine[j].active) + if (!uniforms[i].opaque[j].active) continue; /* How many new entries for this uniform? */ @@ -1268,7 +1263,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog, if (!sh) continue; - if (!uniforms[i].subroutine[j].active) + if (!uniforms[i].opaque[j].active) continue; sh->SubroutineUniformRemapTable = diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 826a1881baf..6df8d61cc44 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -3497,7 +3497,7 @@ build_program_resource_list(struct gl_shader_program *shProg) continue; for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) { - if (!shProg->UniformStorage[i].subroutine[j].active) + if (!shProg->UniformStorage[i].opaque[j].active) continue; type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j); diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c index 58ea0db4e0f..5df79a69a06 100644 --- a/src/glsl/nir/nir_lower_samplers.c +++ b/src/glsl/nir/nir_lower_samplers.c @@ -131,13 +131,13 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr } if (location > shader_program->NumUniformStorage - 1 || - !shader_program->UniformStorage[location].sampler[stage].active) { + !shader_program->UniformStorage[location].opaque[stage].active) { assert(!"cannot return a sampler"); return; } instr->sampler_index += - shader_program->UniformStorage[location].sampler[stage].index; + shader_program->UniformStorage[location].opaque[stage].index; instr->sampler = NULL; } diff --git a/src/glsl/tests/set_uniform_initializer_tests.cpp b/src/glsl/tests/set_uniform_initializer_tests.cpp index 91227d9487a..0b1f66cb342 100644 --- a/src/glsl/tests/set_uniform_initializer_tests.cpp +++ b/src/glsl/tests/set_uniform_initializer_tests.cpp @@ -117,8 +117,8 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage, prog->UniformStorage[index_to_set].array_elements = array_size; prog->UniformStorage[index_to_set].initialized = false; for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { - prog->UniformStorage[index_to_set].sampler[sh].index = ~0; - prog->UniformStorage[index_to_set].sampler[sh].active = false; + prog->UniformStorage[index_to_set].opaque[sh].index = ~0; + prog->UniformStorage[index_to_set].opaque[sh].active = false; } prog->UniformStorage[index_to_set].num_driver_storage = 0; prog->UniformStorage[index_to_set].driver_storage = NULL; @@ -138,8 +138,8 @@ establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage, prog->UniformStorage[i].array_elements = 0; prog->UniformStorage[i].initialized = false; for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { - prog->UniformStorage[i].sampler[sh].index = ~0; - prog->UniformStorage[i].sampler[sh].active = false; + prog->UniformStorage[i].opaque[sh].index = ~0; + prog->UniformStorage[i].opaque[sh].active = false; } prog->UniformStorage[i].num_driver_storage = 0; prog->UniformStorage[i].driver_storage = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index c8568f77304..3960e869421 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1436,7 +1436,7 @@ brw_setup_image_uniform_values(gl_shader_stage stage, &stage_prog_data->param[param_start_index]; for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) { - const unsigned image_idx = storage->image[stage].index + i; + const unsigned image_idx = storage->opaque[stage].index + i; const brw_image_param *image_param = &stage_prog_data->image_param[image_idx]; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 13fdf8c033b..9dd1054c8ee 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2597,7 +2597,7 @@ _mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location, { struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[location]; - int offset = location - uni->subroutine[stage].index; + int offset = location - uni->opaque[stage].index; memcpy(params, &uni->storage[offset], sizeof(GLuint)); } diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 33c959dc1a5..d48729778ae 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -804,11 +804,11 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg, /* If the shader stage doesn't use the sampler uniform, skip this. */ - if (sh == NULL || !uni->sampler[i].active) + if (sh == NULL || !uni->opaque[i].active) continue; for (int j = 0; j < count; j++) { - sh->SamplerUnits[uni->sampler[i].index + offset + j] = + sh->SamplerUnits[uni->opaque[i].index + offset + j] = ((unsigned *) values)[j]; } @@ -850,11 +850,11 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg, */ if (uni->type->is_image()) { for (int i = 0; i < MESA_SHADER_STAGES; i++) { - if (uni->image[i].active) { + if (uni->opaque[i].active) { struct gl_shader *sh = shProg->_LinkedShaders[i]; for (int j = 0; j < count; j++) - sh->ImageUnits[uni->image[i].index + offset + j] = + sh->ImageUnits[uni->opaque[i].index + offset + j] = ((GLint *) values)[j]; } } diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 35ea791827a..e81f459da28 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2352,11 +2352,12 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, struct gl_uniform_storage *storage = &this->shader_program->UniformStorage[location]; - assert(storage->sampler[shader_type].active); + assert(storage->type->is_sampler() && + storage->opaque[shader_type].active); for (unsigned int j = 0; j < size / 4; j++) params->ParameterValues[index + j][0].f = - storage->sampler[shader_type].index + j; + storage->opaque[shader_type].index + j; } } diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index b1168fdade8..1198a3c45f1 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -119,7 +119,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, return 0; } - if (!shader_program->UniformStorage[location].sampler[shader].active) { + if (!shader_program->UniformStorage[location].opaque[shader].active) { assert(0 && "cannot return a sampler"); linker_error(shader_program, "cannot return a sampler named %s, because it is not " @@ -128,7 +128,7 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, return 0; } - return shader_program->UniformStorage[location].sampler[shader].index + + return shader_program->UniformStorage[location].opaque[shader].index + getname.offset; } -- cgit v1.2.3 From 87c3c9acd29655d6c8778ea732f559eebeddd5f6 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Mon, 10 Aug 2015 18:44:18 +0900 Subject: st/dri: Use packed RGB formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes Gallium based DRI drivers failing to load on big endian hosts because they can't find any matching fbconfigs. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71789 Signed-off-by: Michel Dänzer Reviewed-by: Marek Olšák Tested-by: Ilia Mirkin --- src/gallium/state_trackers/dri/dri2.c | 26 +++++++++++++------------- src/gallium/state_trackers/dri/dri_drawable.c | 8 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 712203b9db9..019414b56fe 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -188,10 +188,10 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable, * may occur as the stvis->color_format. */ switch(format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_BGRA8888_UNORM: depth = 32; break; - case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_BGRX8888_UNORM: depth = 24; break; case PIPE_FORMAT_B5G6R5_UNORM: @@ -261,13 +261,13 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable, case PIPE_FORMAT_B5G6R5_UNORM: image_format = __DRI_IMAGE_FORMAT_RGB565; break; - case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_BGRX8888_UNORM: image_format = __DRI_IMAGE_FORMAT_XRGB8888; break; - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_BGRA8888_UNORM: image_format = __DRI_IMAGE_FORMAT_ARGB8888; break; - case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_RGBA8888_UNORM: image_format = __DRI_IMAGE_FORMAT_ABGR8888; break; default: @@ -314,10 +314,10 @@ dri2_allocate_buffer(__DRIscreen *sPriv, switch (format) { case 32: - pf = PIPE_FORMAT_B8G8R8A8_UNORM; + pf = PIPE_FORMAT_BGRA8888_UNORM; break; case 24: - pf = PIPE_FORMAT_B8G8R8X8_UNORM; + pf = PIPE_FORMAT_BGRX8888_UNORM; break; case 16: pf = PIPE_FORMAT_Z16_UNORM; @@ -724,13 +724,13 @@ dri2_create_image_from_winsys(__DRIscreen *_screen, pf = PIPE_FORMAT_B5G6R5_UNORM; break; case __DRI_IMAGE_FORMAT_XRGB8888: - pf = PIPE_FORMAT_B8G8R8X8_UNORM; + pf = PIPE_FORMAT_BGRX8888_UNORM; break; case __DRI_IMAGE_FORMAT_ARGB8888: - pf = PIPE_FORMAT_B8G8R8A8_UNORM; + pf = PIPE_FORMAT_BGRA8888_UNORM; break; case __DRI_IMAGE_FORMAT_ABGR8888: - pf = PIPE_FORMAT_R8G8B8A8_UNORM; + pf = PIPE_FORMAT_RGBA8888_UNORM; break; default: pf = PIPE_FORMAT_NONE; @@ -845,13 +845,13 @@ dri2_create_image(__DRIscreen *_screen, pf = PIPE_FORMAT_B5G6R5_UNORM; break; case __DRI_IMAGE_FORMAT_XRGB8888: - pf = PIPE_FORMAT_B8G8R8X8_UNORM; + pf = PIPE_FORMAT_BGRX8888_UNORM; break; case __DRI_IMAGE_FORMAT_ARGB8888: - pf = PIPE_FORMAT_B8G8R8A8_UNORM; + pf = PIPE_FORMAT_BGRA8888_UNORM; break; case __DRI_IMAGE_FORMAT_ABGR8888: - pf = PIPE_FORMAT_R8G8B8A8_UNORM; + pf = PIPE_FORMAT_RGBA8888_UNORM; break; default: pf = PIPE_FORMAT_NONE; diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 0d2929aaaa1..f0cc4a2a3ef 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -231,11 +231,11 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, if (format == __DRI_TEXTURE_FORMAT_RGB) { /* only need to cover the formats recognized by dri_fill_st_visual */ switch (internal_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - internal_format = PIPE_FORMAT_B8G8R8X8_UNORM; + case PIPE_FORMAT_BGRA8888_UNORM: + internal_format = PIPE_FORMAT_BGRX8888_UNORM; break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - internal_format = PIPE_FORMAT_X8R8G8B8_UNORM; + case PIPE_FORMAT_ARGB8888_UNORM: + internal_format = PIPE_FORMAT_XRGB8888_UNORM; break; default: break; -- cgit v1.2.3 From 1fec05d1142921cdb5b01d94d886062c57905edc Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 4 Oct 2015 21:45:51 -0400 Subject: nv30: pretend to have packed texture/surface formats This puts us in line with what the DDX/DRI2 st are expecting. It also happens to work... no idea why, but seems better to have it work than to ask lots of questions. Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv30/nv30_format.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_format.c b/src/gallium/drivers/nouveau/nv30/nv30_format.c index 67e0d5e9c56..8d9516b863f 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_format.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_format.c @@ -78,12 +78,12 @@ nv30_format_info_table[PIPE_FORMAT_COUNT] = { _(B4G4R4X4_UNORM , S___), _(B4G4R4A4_UNORM , S___), _(B5G6R5_UNORM , SB__), - _(B8G8R8X8_UNORM , SB__), - _(B8G8R8X8_SRGB , S___), - _(B8G8R8A8_UNORM , SB__), - _(B8G8R8A8_SRGB , S___), + _(BGRX8888_UNORM , SB__), + _(BGRX8888_SRGB , S___), + _(BGRA8888_UNORM , SB__), + _(BGRA8888_SRGB , S___), _(R8G8B8A8_UNORM , __V_), - _(R8G8B8A8_SNORM , S___), + _(RGBA8888_SNORM , S___), _(DXT1_RGB , S___), _(DXT1_SRGB , S___), _(DXT1_RGBA , S___), @@ -138,8 +138,8 @@ const struct nv30_format nv30_format_table[PIPE_FORMAT_COUNT] = { R_(B5G5R5X1_UNORM , X1R5G5B5 ), R_(B5G6R5_UNORM , R5G6B5 ), - R_(B8G8R8X8_UNORM , X8R8G8B8 ), - R_(B8G8R8A8_UNORM , A8R8G8B8 ), + R_(BGRX8888_UNORM , X8R8G8B8 ), + R_(BGRA8888_UNORM , A8R8G8B8 ), Z_(Z16_UNORM , Z16 ), Z_(X8Z24_UNORM , Z24S8 ), Z_(S8_UINT_Z24_UNORM , Z24S8 ), @@ -223,11 +223,11 @@ nv30_texfmt_table[PIPE_FORMAT_COUNT] = { _(B4G4R4X4_UNORM , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), _(B4G4R4A4_UNORM , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), _(B5G6R5_UNORM , R5G6B5 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), - _(B8G8R8X8_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), - _(B8G8R8X8_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____), - _(B8G8R8A8_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), - _(B8G8R8A8_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____), - _(R8G8B8A8_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS), + _(BGRX8888_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), + _(BGRX8888_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____), + _(BGRA8888_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(BGRA8888_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____), + _(RGBA8888_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS), _(DXT1_RGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), _(DXT1_SRGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____), _(DXT1_RGBA , DXT1 , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), -- cgit v1.2.3 From 78ec9e28ec759bcaf9781bcbd2b8e051f7df7896 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 4 Oct 2015 21:47:33 -0400 Subject: nv30: always go through translate module on big-endian It seems like things are either coming in slighly wrong, or perhaps uploaded incorrectly, but either way passing them through the translate module seems to fix everything. Eventually we should figure out what's going wrong and fix it "for real", but this should do for now. Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv30/nv30_vbo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c index 8494549e9b1..dec3a0bb856 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c @@ -191,7 +191,11 @@ nv30_vbo_validate(struct nv30_context *nv30) if (!nv30->vertex || nv30->draw_flags) return; +#ifdef PIPE_ARCH_BIG_ENDIAN + if (1) { /* Figure out where the buffers are getting messed up */ +#else if (unlikely(vertex->need_conversion)) { +#endif nv30->vbo_fifo = ~0; nv30->vbo_user = 0; } else { -- cgit v1.2.3 From cd7fa1034adc8891b094a0a52fbbbef0cc708e3d Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sat, 26 Sep 2015 14:40:09 -0700 Subject: i965: Don't print line numbers with INTEL_DEBUG=optimizer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The thing you want to do with the output files is diff them, which is made more difficult by line numbers changing. Reviewed-by: Alejandro Piñeiro --- src/mesa/drivers/dri/i965/brw_shader.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 3960e869421..a16e5958776 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1307,13 +1307,15 @@ backend_shader::dump_instructions(const char *name) if (cfg) { int ip = 0; foreach_block_and_inst(block, backend_instruction, inst, cfg) { - fprintf(file, "%4d: ", ip++); + if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) + fprintf(file, "%4d: ", ip++); dump_instruction(inst, file); } } else { int ip = 0; foreach_in_list(backend_instruction, inst, &instructions) { - fprintf(file, "%4d: ", ip++); + if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) + fprintf(file, "%4d: ", ip++); dump_instruction(inst, file); } } -- cgit v1.2.3 From 4caa10193f6a88f476807aee56b900b3a02d9a6a Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sat, 3 Oct 2015 10:43:26 -0700 Subject: i965/vec4: Remove more dead visitor/vertex program code. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vec4.h | 8 -------- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ---------- src/mesa/drivers/dri/i965/brw_vs.h | 5 ----- 3 files changed, 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 51b3161f659..d1fa095a29f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -223,9 +223,6 @@ public: int implied_mrf_writes(vec4_instruction *inst); - void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst, - src_reg src0, src_reg src1, src_reg one); - vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1); @@ -238,11 +235,6 @@ public: */ src_reg emit_uniformize(const src_reg &src); - /** - * Emit the correct dot-product instruction for the type of arguments - */ - void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); - src_reg fix_3src_operand(const src_reg &src); src_reg resolve_source_modifiers(const src_reg &src); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3e440366195..ca7c01876b7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -278,16 +278,6 @@ vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, return inst; } -void -vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) -{ - static enum opcode dot_opcodes[] = { - BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 - }; - - emit(dot_opcodes[elements - 2], dst, src0, src1); -} - src_reg vec4_visitor::fix_3src_operand(const src_reg &src) { diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 96d2435a515..19551c9d550 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -105,16 +105,11 @@ protected: private: int setup_attributes(int payload_reg); - void setup_vp_regs(); void setup_uniform_clipplane_values(); void emit_clip_distances(dst_reg reg, int offset); - dst_reg get_vp_dst_reg(const prog_dst_register &dst); - src_reg get_vp_src_reg(const prog_src_register &src); const struct brw_vs_prog_key *const key; struct brw_vs_prog_data * const vs_prog_data; - src_reg *vp_temp_regs; - src_reg vp_addr_reg; gl_clip_plane *clip_planes; -- cgit v1.2.3 From 440f9348c1fb877910cebca5413c4300b6738428 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 30 Sep 2015 09:55:00 +0200 Subject: i965: Define BRW_MAX_UBO Instead of using hard-coded values. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 4 ++-- src/mesa/drivers/dri/i965/brw_context.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 4d499295730..8fcba696c8e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -326,7 +326,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxUniformBlockSize = 65536; for (int i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_program_constants *prog = &ctx->Const.Program[i]; - prog->MaxUniformBlocks = 12; + prog->MaxUniformBlocks = BRW_MAX_UBO; prog->MaxCombinedUniformComponents = prog->MaxUniformComponents + ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; @@ -346,7 +346,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0; if (_mesa_extension_override_enables.ARB_compute_shader) { ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxUniformBufferBindings += 12; + ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO; } else { ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 19a511710da..8fe88d2c85f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -708,6 +708,9 @@ struct brw_vs_prog_data { /** Max number of render targets in a shader */ #define BRW_MAX_DRAW_BUFFERS 8 +/** Max number of UBOs in a shader */ +#define BRW_MAX_UBO 12 + /** Max number of atomic counter buffer objects in a shader */ #define BRW_MAX_ABO 16 @@ -744,7 +747,7 @@ struct brw_vs_prog_data { #define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \ BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \ - 12 + /* ubo */ \ + BRW_MAX_UBO + \ BRW_MAX_ABO + \ BRW_MAX_IMAGES + \ 2 + /* shader time, pull constants */ \ -- cgit v1.2.3 From 41c4d45e08b3bf948f24d007c9b7d0c47f3f89d8 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 30 Sep 2015 11:05:49 +0200 Subject: i965: Define BRW_MAX_SSBO Instead of using hard-coded values. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 14 +++++++------- src/mesa/drivers/dri/i965/brw_context.h | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 8fcba696c8e..87c84c6236b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -582,17 +582,17 @@ brw_initialize_context_constants(struct brw_context *brw) /* FIXME: Tessellation stages are not yet supported in i965, so * MaxCombinedShaderStorageBlocks doesn't take them into account. */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO; ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0; ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12; - ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3; - ctx->Const.MaxShaderStorageBufferBindings = 36; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO; + ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3; + ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3; if (_mesa_extension_override_enables.ARB_compute_shader) - ctx->Const.MaxShaderStorageBufferBindings += 12; + ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO; if (brw->gen >= 6) { ctx->Const.MaxVarying = 32; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8fe88d2c85f..0ffc262f289 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -711,6 +711,9 @@ struct brw_vs_prog_data { /** Max number of UBOs in a shader */ #define BRW_MAX_UBO 12 +/** Max number of SSBOs in a shader */ +#define BRW_MAX_SSBO 12 + /** Max number of atomic counter buffer objects in a shader */ #define BRW_MAX_ABO 16 -- cgit v1.2.3 From 20cbe3688aec2fd371fea096e6a0de0a38c2ae70 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 30 Sep 2015 11:06:30 +0200 Subject: i965: Reserve binding table space for SSBO surfaces These share the space with UBO surfaces but we need to make sure we allocate enough space for both sets (12 of each) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0ffc262f289..8efecf98218 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -751,6 +751,7 @@ struct brw_vs_prog_data { #define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \ BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \ BRW_MAX_UBO + \ + BRW_MAX_SSBO + \ BRW_MAX_ABO + \ BRW_MAX_IMAGES + \ 2 + /* shader time, pull constants */ \ -- cgit v1.2.3 From 102f6c446b6585b00e1c1cd3c838f7b0899e49a9 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 30 Sep 2015 10:24:11 +0200 Subject: i965: Assert on the number of combined UBO and SSBO binding table entries In theory we can't break this assertion since the compiler frontend checks that we don't exceed any of the individual limits, but it does not hurt to be extra safe. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 3 +++ src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8efecf98218..546f8fc35fe 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -714,6 +714,9 @@ struct brw_vs_prog_data { /** Max number of SSBOs in a shader */ #define BRW_MAX_SSBO 12 +/** Max number of combined UBOs and SSBOs in a shader */ +#define BRW_MAX_COMBINED_UBO_SSBO (BRW_MAX_UBO + BRW_MAX_SSBO) + /** Max number of atomic counter buffer objects in a shader */ #define BRW_MAX_ABO 16 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index a16e5958776..7bc080b082e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1366,6 +1366,7 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, next_binding_table_offset += num_textures; if (shader) { + assert(shader->NumUniformBlocks <= BRW_MAX_COMBINED_UBO_SSBO); stage_prog_data->binding_table.ubo_start = next_binding_table_offset; next_binding_table_offset += shader->NumUniformBlocks; } else { -- cgit v1.2.3 From a90feb581a1c0e63bf13aace5d11b6532e881313 Mon Sep 17 00:00:00 2001 From: Tapani Pälli Date: Mon, 5 Oct 2015 12:50:10 +0300 Subject: glsl: set glsl error if binding qualifier used on global scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes following Piglit test: global-scope-binding-qualifier.frag Signed-off-by: Tapani Pälli Reviewed-by: Samuel Iglesias Gonsálvez --- src/glsl/glsl_parser.yy | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index f0abeb03215..c1bcccc34f4 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -2786,6 +2786,17 @@ layout_defaults: if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) { YYERROR; } + + /* From the GLSL 4.50 spec, section 4.4.5: + * + * "It is a compile-time error to specify the binding identifier for + * the global scope or for block member declarations." + */ + if (state->default_shader_storage_qualifier->flags.q.explicit_binding) { + _mesa_glsl_error(& @1, state, + "binding qualifier cannot be set for default layout"); + } + $$ = NULL; } -- cgit v1.2.3 From 73e0dfbaca2fd334fd3505412bf0d38054affd25 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 5 Oct 2015 09:02:54 +0200 Subject: i965: Make vec4_visitor's destructor virtual We need a virtual destructor when at least one of the class' methods is virtual. Failure to do so might lead to undefined behavior when destructing derived classes. Fixes the following warning: brw_vec4_gs_visitor.cpp: In function 'const unsigned int* brw::brw_gs_emit(brw_context*, gl_shader_program*, brw_gs_compile*, void*, unsigned int*)': brw_vec4_gs_visitor.cpp:703:11: warning: deleting object of polymorphic class type 'brw::vec4_gs_visitor' which has non-virtual destructor might cause undefined behaviour [-Wdelete-non-virtual-dtor] delete gs; Curro: This shouldn't be causing any actual bugs at the moment because gen6_gs_visitor is the only subclass of vec4_visitor destroyed through a pointer of a base class (vec4_gs_visitor *) and its destructor is basically the same as its parent's. Anyway it seems sensible to change this so it doesn't bite us in the future. Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index d1fa095a29f..5e3500c0c9a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -76,7 +76,7 @@ public: void *mem_ctx, bool no_spills, int shader_time_index); - ~vec4_visitor(); + virtual ~vec4_visitor(); dst_reg dst_null_f() { -- cgit v1.2.3 From 125a04b474d4a07fec892e00fd56340e7d4ab03b Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 1 Oct 2015 07:31:31 -0700 Subject: i965/mt: Declare some functions as static intel_tiling_supports_non_msrt_mcs() and intel_miptree_is_fast_clear_capable() are not used outside of intel_mipmap_tree.c. Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 ++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index ffc356c9240..05dc291d847 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -160,7 +160,7 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt, } } -bool +static bool intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render @@ -193,7 +193,7 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, * 64bpp, and 128bpp. */ -bool +static bool intel_miptree_is_fast_clear_capable(struct brw_context *brw, struct intel_mipmap_tree *mt) { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 486e5c6f43b..805cd714d88 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -658,11 +658,7 @@ struct intel_mipmap_tree void intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height); -bool -intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling); -bool -intel_miptree_is_fast_clear_capable(struct brw_context *brw, - struct intel_mipmap_tree *mt); + bool intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt); -- cgit v1.2.3 From 93161be9e7150ae5931000627833e714901cf195 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 1 Oct 2015 08:06:35 -0700 Subject: i965: Fix intel_miptree_is_fast_clear_capable() There are three types of fast clears: a. fast depth clears b. fast singlesample color clears c. fast multisample color clears Function intel_miptree_is_fast_clear_capable() checks if a miptree supports fast clears of type (b). Rename the function to disambiguate what it does: old: intel_miptree_is_fast_clear_capable new: intel_miptree_supports_non_msrt_fast_clear The functionally accidentally rejected multisampled color surfaces because it thought they were singlesample array surfaces. Fix that by explicitly rejecting surfaces with samples > 1. This fix would have been needed before we enabled layered fast singlesample color clears (introduced in gen8), which we want to do eventually. For now, though, this patch changes no behavior; it just fixes how the driver chooses its behavior. Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 05dc291d847..a169c41790e 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -194,8 +194,8 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) * 64bpp, and 128bpp. */ static bool -intel_miptree_is_fast_clear_capable(struct brw_context *brw, - struct intel_mipmap_tree *mt) +intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, + struct intel_mipmap_tree *mt) { /* MCS support does not exist prior to Gen7 */ if (brw->gen < 7) @@ -204,6 +204,10 @@ intel_miptree_is_fast_clear_capable(struct brw_context *brw, if (mt->disable_aux_buffers) return false; + /* This function applies only to non-multisampled render targets. */ + if (mt->num_samples > 1) + return false; + /* MCS is only supported for color buffers */ switch (_mesa_get_format_base_format(mt->format)) { case GL_DEPTH_COMPONENT: @@ -222,7 +226,16 @@ intel_miptree_is_fast_clear_capable(struct brw_context *brw, return false; } + + /* Check for layered surfaces. */ if (mt->physical_depth0 != 1) { + /* Multisample surfaces with the CMS layout are not layered surfaces, + * yet still have physical_depth0 > 1. Assert that we don't + * accidentally reject a multisampled surface here. We should have + * rejected it earlier by explicitly checking the sample count. + */ + assert(mt->num_samples <= 1); + if (brw->gen >= 8) { perf_debug("Layered fast clear - giving up. (%dx%d%d)\n", mt->logical_width0, mt->logical_height0, @@ -494,7 +507,7 @@ intel_miptree_create_layout(struct brw_context *brw, * 7 | ? | ? * 6 | ? | ? */ - if (intel_miptree_is_fast_clear_capable(brw, mt)) { + if (intel_miptree_supports_non_msrt_fast_clear(brw, mt)) { if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1)) layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; } else if (brw->gen >= 9 && num_samples > 1) { @@ -692,7 +705,7 @@ intel_miptree_create(struct brw_context *brw, * clear actually occurs. */ if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) && - intel_miptree_is_fast_clear_capable(brw, mt)) { + intel_miptree_supports_non_msrt_fast_clear(brw, mt)) { mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1); } @@ -800,8 +813,9 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, * clear actually occurs. */ if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) && - intel_miptree_is_fast_clear_capable(intel, singlesample_mt)) + intel_miptree_supports_non_msrt_fast_clear(intel, singlesample_mt)) { singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; + } if (num_samples == 0) { intel_miptree_release(&irb->mt); -- cgit v1.2.3 From 36ea9922ada5ea99e54231697a4afb31d5f6b9bf Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 1 Oct 2015 10:06:55 -0700 Subject: mesa: Add missing _mm_mfence() before streaming loads. According to the Intel Software Development Manual (Volume 1: Basic Architecture, 12.10.3 Streaming Load Hint Instruction): Streaming loads may be weakly ordered and may appear to software to execute out of order with respect to other memory operations. Software must explicitly use fences (e.g. MFENCE) if it needs to preserve order among streaming loads or between streaming loads and other memory operations. That is, a memory fence is needed to preserve the order between the GPU writing the buffer and the streaming loads reading it back. Reported-by: Joseph Nuzman Reviewed-by: Jordan Justen --- src/mesa/main/streaming-load-memcpy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/main/streaming-load-memcpy.c b/src/mesa/main/streaming-load-memcpy.c index d7147afdc5c..32854b60eb2 100644 --- a/src/mesa/main/streaming-load-memcpy.c +++ b/src/mesa/main/streaming-load-memcpy.c @@ -59,6 +59,9 @@ _mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len) len -= MIN2(bytes_before_alignment_boundary, len); } + if (len >= 64) + _mm_mfence(); + while (len >= 64) { __m128i *dst_cacheline = (__m128i *)d; __m128i *src_cacheline = (__m128i *)s; -- cgit v1.2.3 From 596441992130460c7d9a792e50eea46d27297d44 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 2 Oct 2015 20:18:34 -0700 Subject: i965/fs: Remove SNB embedded-comparison support from optimizations. We never emit IF instructions with an embedded comparison (lost in the switch to NIR), so this code is not used. If we want to readd support, we should have a pass that merges a CMP instruction with an IF or a WHILE instruction after other optimizations have run. Reviewed-by: Jason Ekstrand --- .../dri/i965/brw_fs_peephole_predicated_break.cpp | 14 ++------------ src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 21 +-------------------- 2 files changed, 3 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index b75f40ba5a1..8f7bd83ec70 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -83,18 +83,8 @@ fs_visitor::opt_peephole_predicated_break() bblock_t *if_block = jump_block->prev(); bblock_t *endif_block = jump_block->next(); - /* For Sandybridge with IF with embedded comparison we need to emit an - * instruction to set the flag register. - */ - if (devinfo->gen == 6 && if_inst->conditional_mod) { - const fs_builder ibld(this, if_block, if_inst); - ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - jump_inst->predicate = BRW_PREDICATE_NORMAL; - } else { - jump_inst->predicate = if_inst->predicate; - jump_inst->predicate_inverse = if_inst->predicate_inverse; - } + jump_inst->predicate = if_inst->predicate; + jump_inst->predicate_inverse = if_inst->predicate_inverse; bblock_t *earlier_block = if_block; if (if_block->start_ip == if_block->end_ip) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index d190d8eb6b4..8613725f6b9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -155,18 +155,6 @@ fs_visitor::opt_peephole_sel() if (movs == 0) continue; - enum brw_predicate predicate; - bool predicate_inverse; - if (devinfo->gen == 6 && if_inst->conditional_mod) { - /* For Sandybridge with IF with embedded comparison */ - predicate = BRW_PREDICATE_NORMAL; - predicate_inverse = false; - } else { - /* Separate CMP and IF instructions */ - predicate = if_inst->predicate; - predicate_inverse = if_inst->predicate_inverse; - } - /* Generate SEL instructions for pairs of MOVs to a common destination. */ for (int i = 0; i < movs; i++) { if (!then_mov[i] || !else_mov[i]) @@ -195,13 +183,6 @@ fs_visitor::opt_peephole_sel() if (movs == 0) continue; - /* Emit a CMP if our IF used the embedded comparison */ - if (devinfo->gen == 6 && if_inst->conditional_mod) { - const fs_builder ibld(this, block, if_inst); - ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); - } - for (int i = 0; i < movs; i++) { const fs_builder ibld = fs_builder(this, then_block, then_mov[i]) .at(block, if_inst); @@ -220,7 +201,7 @@ fs_visitor::opt_peephole_sel() ibld.MOV(src0, then_mov[i]->src[0]); } - set_predicate_inv(predicate, predicate_inverse, + set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse, ibld.SEL(then_mov[i]->dst, src0, else_mov[i]->src[0])); } -- cgit v1.2.3 From 4098a756b5590a460bdb0ee7d54cca81375f61e8 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 2 Oct 2015 20:23:35 -0700 Subject: i965/fs: Use backend_instruction in predicated break peephole. We're not using any fs_inst fields, and the next commit will make the peephole used by the vec4 backend. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index 8f7bd83ec70..29f21680655 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -66,16 +66,16 @@ fs_visitor::opt_peephole_predicated_break() /* BREAK and CONTINUE instructions, by definition, can only be found at * the ends of basic blocks. */ - fs_inst *jump_inst = (fs_inst *)block->end(); + backend_instruction *jump_inst = block->end(); if (jump_inst->opcode != BRW_OPCODE_BREAK && jump_inst->opcode != BRW_OPCODE_CONTINUE) continue; - fs_inst *if_inst = (fs_inst *)block->prev()->end(); + backend_instruction *if_inst = block->prev()->end(); if (if_inst->opcode != BRW_OPCODE_IF) continue; - fs_inst *endif_inst = (fs_inst *)block->next()->start(); + backend_instruction *endif_inst = block->next()->start(); if (endif_inst->opcode != BRW_OPCODE_ENDIF) continue; @@ -120,7 +120,7 @@ fs_visitor::opt_peephole_predicated_break() * the two basic blocks. */ bblock_t *while_block = earlier_block->next(); - fs_inst *while_inst = (fs_inst *)while_block->start(); + backend_instruction *while_inst = while_block->start(); if (jump_inst->opcode == BRW_OPCODE_BREAK && while_inst->opcode == BRW_OPCODE_WHILE && -- cgit v1.2.3 From 5a360dcad1fdb91f9129cb21775b9af60cbf57e4 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 2 Oct 2015 20:30:41 -0700 Subject: i965: Generalize predicated break pass for use in vec4 backend. instructions in affected programs: 44204 -> 43762 (-1.00%) helped: 221 Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/Makefile.sources | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- .../dri/i965/brw_fs_peephole_predicated_break.cpp | 149 --------------------- src/mesa/drivers/dri/i965/brw_predicated_break.cpp | 148 ++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.h | 6 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 + 6 files changed, 156 insertions(+), 152 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_predicated_break.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index eb8196d4845..6f97f735add 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -55,7 +55,6 @@ i965_FILES = \ brw_fs_live_variables.cpp \ brw_fs_live_variables.h \ brw_fs_nir.cpp \ - brw_fs_peephole_predicated_break.cpp \ brw_fs_reg_allocate.cpp \ brw_fs_register_coalesce.cpp \ brw_fs_saturate_propagation.cpp \ @@ -91,6 +90,7 @@ i965_FILES = \ brw_packed_float.c \ brw_performance_monitor.c \ brw_pipe_control.c \ + brw_predicated_break.cpp \ brw_primitive_restart.c \ brw_program.c \ brw_program.h \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 03206882ebf..1187c6765cd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4823,7 +4823,7 @@ fs_visitor::optimize() OPT(opt_algebraic); OPT(opt_cse); OPT(opt_copy_propagate); - OPT(opt_peephole_predicated_break); + OPT(opt_predicated_break, this); OPT(opt_cmod_propagation); OPT(dead_code_eliminate); OPT(opt_peephole_sel); diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp deleted file mode 100644 index 29f21680655..00000000000 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_fs.h" -#include "brw_cfg.h" - -using namespace brw; - -/** @file brw_fs_peephole_predicated_break.cpp - * - * Loops are often structured as - * - * loop: - * CMP.f0 - * (+f0) IF - * BREAK - * ENDIF - * ... - * WHILE loop - * - * This peephole pass removes the IF and ENDIF instructions and predicates the - * BREAK, dropping two instructions from the loop body. - * - * If the loop was a DO { ... } WHILE loop, it looks like - * - * loop: - * ... - * CMP.f0 - * (+f0) IF - * BREAK - * ENDIF - * WHILE loop - * - * and we can remove the BREAK instruction and predicate the WHILE. - */ - -bool -fs_visitor::opt_peephole_predicated_break() -{ - bool progress = false; - - foreach_block (block, cfg) { - if (block->start_ip != block->end_ip) - continue; - - /* BREAK and CONTINUE instructions, by definition, can only be found at - * the ends of basic blocks. - */ - backend_instruction *jump_inst = block->end(); - if (jump_inst->opcode != BRW_OPCODE_BREAK && - jump_inst->opcode != BRW_OPCODE_CONTINUE) - continue; - - backend_instruction *if_inst = block->prev()->end(); - if (if_inst->opcode != BRW_OPCODE_IF) - continue; - - backend_instruction *endif_inst = block->next()->start(); - if (endif_inst->opcode != BRW_OPCODE_ENDIF) - continue; - - bblock_t *jump_block = block; - bblock_t *if_block = jump_block->prev(); - bblock_t *endif_block = jump_block->next(); - - jump_inst->predicate = if_inst->predicate; - jump_inst->predicate_inverse = if_inst->predicate_inverse; - - bblock_t *earlier_block = if_block; - if (if_block->start_ip == if_block->end_ip) { - earlier_block = if_block->prev(); - } - - if_inst->remove(if_block); - - bblock_t *later_block = endif_block; - if (endif_block->start_ip == endif_block->end_ip) { - later_block = endif_block->next(); - } - endif_inst->remove(endif_block); - - if (!earlier_block->ends_with_control_flow()) { - earlier_block->children.make_empty(); - earlier_block->add_successor(cfg->mem_ctx, jump_block); - } - - if (!later_block->starts_with_control_flow()) { - later_block->parents.make_empty(); - } - jump_block->add_successor(cfg->mem_ctx, later_block); - - if (earlier_block->can_combine_with(jump_block)) { - earlier_block->combine_with(jump_block); - - block = earlier_block; - } - - /* Now look at the first instruction of the block following the BREAK. If - * it's a WHILE, we can delete the break, predicate the WHILE, and join - * the two basic blocks. - */ - bblock_t *while_block = earlier_block->next(); - backend_instruction *while_inst = while_block->start(); - - if (jump_inst->opcode == BRW_OPCODE_BREAK && - while_inst->opcode == BRW_OPCODE_WHILE && - while_inst->predicate == BRW_PREDICATE_NONE) { - jump_inst->remove(earlier_block); - while_inst->predicate = jump_inst->predicate; - while_inst->predicate_inverse = !jump_inst->predicate_inverse; - - earlier_block->children.make_empty(); - earlier_block->add_successor(cfg->mem_ctx, while_block); - - assert(earlier_block->can_combine_with(while_block)); - earlier_block->combine_with(while_block); - - earlier_block->next()->parents.make_empty(); - earlier_block->add_successor(cfg->mem_ctx, earlier_block->next()); - } - - progress = true; - } - - if (progress) - invalidate_live_intervals(); - - return progress; -} diff --git a/src/mesa/drivers/dri/i965/brw_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_predicated_break.cpp new file mode 100644 index 00000000000..607715dace4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_predicated_break.cpp @@ -0,0 +1,148 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_cfg.h" + +using namespace brw; + +/** @file brw_predicated_break.cpp + * + * Loops are often structured as + * + * loop: + * CMP.f0 + * (+f0) IF + * BREAK + * ENDIF + * ... + * WHILE loop + * + * This peephole pass removes the IF and ENDIF instructions and predicates the + * BREAK, dropping two instructions from the loop body. + * + * If the loop was a DO { ... } WHILE loop, it looks like + * + * loop: + * ... + * CMP.f0 + * (+f0) IF + * BREAK + * ENDIF + * WHILE loop + * + * and we can remove the BREAK instruction and predicate the WHILE. + */ + +bool +opt_predicated_break(backend_shader *s) +{ + bool progress = false; + + foreach_block (block, s->cfg) { + if (block->start_ip != block->end_ip) + continue; + + /* BREAK and CONTINUE instructions, by definition, can only be found at + * the ends of basic blocks. + */ + backend_instruction *jump_inst = block->end(); + if (jump_inst->opcode != BRW_OPCODE_BREAK && + jump_inst->opcode != BRW_OPCODE_CONTINUE) + continue; + + backend_instruction *if_inst = block->prev()->end(); + if (if_inst->opcode != BRW_OPCODE_IF) + continue; + + backend_instruction *endif_inst = block->next()->start(); + if (endif_inst->opcode != BRW_OPCODE_ENDIF) + continue; + + bblock_t *jump_block = block; + bblock_t *if_block = jump_block->prev(); + bblock_t *endif_block = jump_block->next(); + + jump_inst->predicate = if_inst->predicate; + jump_inst->predicate_inverse = if_inst->predicate_inverse; + + bblock_t *earlier_block = if_block; + if (if_block->start_ip == if_block->end_ip) { + earlier_block = if_block->prev(); + } + + if_inst->remove(if_block); + + bblock_t *later_block = endif_block; + if (endif_block->start_ip == endif_block->end_ip) { + later_block = endif_block->next(); + } + endif_inst->remove(endif_block); + + if (!earlier_block->ends_with_control_flow()) { + earlier_block->children.make_empty(); + earlier_block->add_successor(s->cfg->mem_ctx, jump_block); + } + + if (!later_block->starts_with_control_flow()) { + later_block->parents.make_empty(); + } + jump_block->add_successor(s->cfg->mem_ctx, later_block); + + if (earlier_block->can_combine_with(jump_block)) { + earlier_block->combine_with(jump_block); + + block = earlier_block; + } + + /* Now look at the first instruction of the block following the BREAK. If + * it's a WHILE, we can delete the break, predicate the WHILE, and join + * the two basic blocks. + */ + bblock_t *while_block = earlier_block->next(); + backend_instruction *while_inst = while_block->start(); + + if (jump_inst->opcode == BRW_OPCODE_BREAK && + while_inst->opcode == BRW_OPCODE_WHILE && + while_inst->predicate == BRW_PREDICATE_NONE) { + jump_inst->remove(earlier_block); + while_inst->predicate = jump_inst->predicate; + while_inst->predicate_inverse = !jump_inst->predicate_inverse; + + earlier_block->children.make_empty(); + earlier_block->add_successor(s->cfg->mem_ctx, while_block); + + assert(earlier_block->can_combine_with(while_block)); + earlier_block->combine_with(while_block); + + earlier_block->next()->parents.make_empty(); + earlier_block->add_successor(s->cfg->mem_ctx, earlier_block->next()); + } + + progress = true; + } + + if (progress) + s->invalidate_live_intervals(); + + return progress; +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index fd96740526b..b6c070ef4c7 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -219,7 +219,7 @@ enum instruction_scheduler_mode { SCHEDULE_POST, }; -class backend_shader { +struct backend_shader { protected: backend_shader(const struct brw_compiler *compiler, @@ -273,6 +273,8 @@ void brw_setup_image_uniform_values(gl_shader_stage stage, unsigned param_start_index, const gl_uniform_storage *storage); +#else +struct backend_shader; #endif /* __cplusplus */ enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type); @@ -283,6 +285,8 @@ bool brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg); bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg); bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg); +bool opt_predicated_break(struct backend_shader *s); + #ifdef __cplusplus extern "C" { #endif diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 7e94cc3ef4a..76ce0c46198 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1862,6 +1862,7 @@ vec4_visitor::run() pass_num = 0; iteration++; + OPT(opt_predicated_break, this); OPT(opt_reduce_swizzle); OPT(dead_code_eliminate); OPT(dead_control_flow_eliminate, this); -- cgit v1.2.3 From 617eb5e6c3058730c118b9178b50f2ab82f6932d Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 23 Sep 2015 12:44:44 -0700 Subject: glsl: Remove CSE pass. With NIR, it actually hurts things. total instructions in shared programs: 6529329 -> 6528888 (-0.01%) instructions in affected programs: 14833 -> 14392 (-2.97%) helped: 299 HURT: 1 In all affected programs I inspected (including the single hurt one) the pass CSE'd some multiplies and caused some reassociation (e.g., caused (A * B) * C to be A * (B * C)) when the original intermediate result was reused elsewhere. Acked-by: Kenneth Graunke --- src/glsl/Makefile.sources | 1 - src/glsl/glsl_parser_extras.cpp | 1 - src/glsl/ir_optimization.h | 1 - src/glsl/opt_cse.cpp | 472 ---------------------------------------- 4 files changed, 475 deletions(-) delete mode 100644 src/glsl/opt_cse.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 32b6dba2e91..70832460af9 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -184,7 +184,6 @@ LIBGLSL_FILES = \ opt_constant_variable.cpp \ opt_copy_propagation.cpp \ opt_copy_propagation_elements.cpp \ - opt_cse.cpp \ opt_dead_builtin_variables.cpp \ opt_dead_builtin_varyings.cpp \ opt_dead_code.cpp \ diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index f5542415d4a..b521c5f3280 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -1899,7 +1899,6 @@ do_common_optimization(exec_list *ir, bool linked, progress = do_constant_variable_unlinked(ir) || progress; progress = do_constant_folding(ir) || progress; progress = do_minmax_prune(ir) || progress; - progress = do_cse(ir) || progress; progress = do_rebalance_tree(ir) || progress; progress = do_algebraic(ir, native_integers, options) || progress; progress = do_lower_jumps(ir) || progress; diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 265b2234cb6..ce5c4929bfb 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -87,7 +87,6 @@ bool do_constant_variable_unlinked(exec_list *instructions); bool do_copy_propagation(exec_list *instructions); bool do_copy_propagation_elements(exec_list *instructions); bool do_constant_propagation(exec_list *instructions); -bool do_cse(exec_list *instructions); void do_dead_builtin_varyings(struct gl_context *ctx, gl_shader *producer, gl_shader *consumer, unsigned num_tfeedback_decls, diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp deleted file mode 100644 index 4b8e9a07ba5..00000000000 --- a/src/glsl/opt_cse.cpp +++ /dev/null @@ -1,472 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_cse.cpp - * - * constant subexpression elimination at the GLSL IR level. - * - * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one - * is generic and handles texture operations, but it's rather simple currently - * and doesn't support modification of variables in the available expressions - * list, so it can't do variables other than uniforms or shader inputs. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "ir_basic_block.h" -#include "ir_optimization.h" -#include "ir_builder.h" -#include "glsl_types.h" - -using namespace ir_builder; - -static bool debug = false; - -namespace { - -/** - * This is the record of an available expression for common subexpression - * elimination. - */ -class ae_entry : public exec_node -{ -public: - ae_entry(ir_instruction *base_ir, ir_rvalue **val) - : val(val), base_ir(base_ir) - { - assert(val); - assert(*val); - assert(base_ir); - - var = NULL; - } - - void init(ir_instruction *base_ir, ir_rvalue **val) - { - this->val = val; - this->base_ir = base_ir; - this->var = NULL; - - assert(val); - assert(*val); - assert(base_ir); - } - - /** - * The pointer to the expression that we might be able to reuse - * - * Note the double pointer -- this is the place in the base_ir expression - * tree that we would rewrite to move the expression out to a new variable - * assignment. - */ - ir_rvalue **val; - - /** - * Root instruction in the basic block where the expression appeared. - * - * This is used so that we can insert the new variable declaration into the - * instruction stream (since *val is just somewhere in base_ir's expression - * tree). - */ - ir_instruction *base_ir; - - /** - * The variable that the expression has been stored in, if it's been CSEd - * once already. - */ - ir_variable *var; -}; - -class cse_visitor : public ir_rvalue_visitor { -public: - cse_visitor(exec_list *validate_instructions) - : validate_instructions(validate_instructions) - { - progress = false; - mem_ctx = ralloc_context(NULL); - this->ae = new(mem_ctx) exec_list; - } - ~cse_visitor() - { - ralloc_free(mem_ctx); - } - - virtual ir_visitor_status visit_enter(ir_function_signature *ir); - virtual ir_visitor_status visit_enter(ir_loop *ir); - virtual ir_visitor_status visit_enter(ir_if *ir); - virtual ir_visitor_status visit_enter(ir_call *ir); - virtual void handle_rvalue(ir_rvalue **rvalue); - - bool progress; - -private: - void *mem_ctx; - - ir_rvalue *try_cse(ir_rvalue *rvalue); - void add_to_ae(ir_rvalue **rvalue); - - /** - * Move all nodes from the ae list to the free list - */ - void empty_ae_list(); - - /** - * Get and initialize a new ae_entry - * - * This will either come from the free list or be freshly allocated. - */ - ae_entry *get_ae_entry(ir_rvalue **rvalue); - - /** List of ae_entry: The available expressions to reuse */ - exec_list *ae; - - /** - * The whole shader, so that we can validate_ir_tree in debug mode. - * - * This proved quite useful when trying to get the tree manipulation - * right. - */ - exec_list *validate_instructions; - - /** - * List of available-for-use ae_entry objects. - */ - exec_list free_ae_entries; -}; - -/** - * Visitor to walk an expression tree to check that all variables referenced - * are constants. - */ -class is_cse_candidate_visitor : public ir_hierarchical_visitor -{ -public: - - is_cse_candidate_visitor() - : ok(true) - { - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir); - - bool ok; -}; - - -class contains_rvalue_visitor : public ir_rvalue_visitor -{ -public: - - contains_rvalue_visitor(ir_rvalue *val) - : val(val) - { - found = false; - } - - virtual void handle_rvalue(ir_rvalue **rvalue); - - bool found; - -private: - ir_rvalue *val; -}; - -} /* unnamed namespace */ - -static void -dump_ae(exec_list *ae) -{ - int i = 0; - - printf("CSE: AE contents:\n"); - foreach_in_list(ae_entry, entry, ae) { - printf("CSE: AE %2d (%p): ", i, entry); - (*entry->val)->print(); - printf("\n"); - - if (entry->var) - printf("CSE: in var %p:\n", entry->var); - - i++; - } -} - -ir_visitor_status -is_cse_candidate_visitor::visit(ir_dereference_variable *ir) -{ - /* Currently, since we don't handle kills of the ae based on variables - * getting assigned, we can only handle constant variables. - */ - if (ir->var->data.read_only) { - return visit_continue; - } else { - if (debug) - printf("CSE: non-candidate: var %s is not read only\n", ir->var->name); - ok = false; - return visit_stop; - } -} - -void -contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (*rvalue == val) - found = true; -} - -static bool -contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle) -{ - contains_rvalue_visitor v(needle); - haystack->accept(&v); - return v.found; -} - -static bool -is_cse_candidate(ir_rvalue *ir) -{ - /* Our temporary variable assignment generation isn't ready to handle - * anything bigger than a vector. - */ - if (!ir->type->is_vector() && !ir->type->is_scalar()) { - if (debug) - printf("CSE: non-candidate: not a vector/scalar\n"); - return false; - } - - /* Only handle expressions and textures currently. We may want to extend - * to variable-index array dereferences at some point. - */ - switch (ir->ir_type) { - case ir_type_expression: - case ir_type_texture: - break; - default: - if (debug) - printf("CSE: non-candidate: not an expression/texture\n"); - return false; - } - - is_cse_candidate_visitor v; - - ir->accept(&v); - - return v.ok; -} - -/** - * Tries to find and return a reference to a previous computation of a given - * expression. - * - * Walk the list of available expressions checking if any of them match the - * rvalue, and if so, move the previous copy of the expression to a temporary - * and return a reference of the temporary. - */ -ir_rvalue * -cse_visitor::try_cse(ir_rvalue *rvalue) -{ - foreach_in_list(ae_entry, entry, ae) { - if (debug) { - printf("Comparing to AE %p: ", entry); - (*entry->val)->print(); - printf("\n"); - } - - if (!rvalue->equals(*entry->val)) - continue; - - if (debug) { - printf("CSE: Replacing: "); - (*entry->val)->print(); - printf("\n"); - printf("CSE: with: "); - rvalue->print(); - printf("\n"); - } - - if (!entry->var) { - ir_instruction *base_ir = entry->base_ir; - - ir_variable *var = new(rvalue) ir_variable(rvalue->type, - "cse", - ir_var_temporary); - - /* Write the previous expression result into a new variable. */ - base_ir->insert_before(var); - ir_assignment *assignment = assign(var, *entry->val); - base_ir->insert_before(assignment); - - /* Replace the expression in the original tree with a deref of the - * variable, but keep tracking the expression for further reuse. - */ - *entry->val = new(rvalue) ir_dereference_variable(var); - entry->val = &assignment->rhs; - - entry->var = var; - - /* Update the base_irs in the AE list. We have to be sure that - * they're correct -- expressions from our base_ir that weren't moved - * need to stay in this base_ir (so that later consumption of them - * puts new variables between our new variable and our base_ir), but - * expressions from our base_ir that we *did* move need base_ir - * updated so that any further elimination from inside gets its new - * assignments put before our new assignment. - */ - foreach_in_list(ae_entry, fixup_entry, ae) { - if (contains_rvalue(assignment->rhs, *fixup_entry->val)) - fixup_entry->base_ir = assignment; - } - - if (debug) - dump_ae(ae); - } - - /* Replace the expression in our current tree with the variable. */ - return new(rvalue) ir_dereference_variable(entry->var); - } - - return NULL; -} - -void -cse_visitor::empty_ae_list() -{ - free_ae_entries.append_list(ae); -} - -ae_entry * -cse_visitor::get_ae_entry(ir_rvalue **rvalue) -{ - ae_entry *entry = (ae_entry *) free_ae_entries.pop_head(); - if (entry) { - entry->init(base_ir, rvalue); - } else { - entry = new(mem_ctx) ae_entry(base_ir, rvalue); - } - - return entry; -} - -/** Add the rvalue to the list of available expressions for CSE. */ -void -cse_visitor::add_to_ae(ir_rvalue **rvalue) -{ - if (debug) { - printf("CSE: Add to AE: "); - (*rvalue)->print(); - printf("\n"); - } - - ae->push_tail(get_ae_entry(rvalue)); - - if (debug) - dump_ae(ae); -} - -void -cse_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - if (debug) { - printf("CSE: handle_rvalue "); - (*rvalue)->print(); - printf("\n"); - } - - if (!is_cse_candidate(*rvalue)) - return; - - ir_rvalue *new_rvalue = try_cse(*rvalue); - if (new_rvalue) { - *rvalue = new_rvalue; - progress = true; - - if (debug) - validate_ir_tree(validate_instructions); - } else { - add_to_ae(rvalue); - } -} - -ir_visitor_status -cse_visitor::visit_enter(ir_if *ir) -{ - handle_rvalue(&ir->condition); - - empty_ae_list(); - visit_list_elements(this, &ir->then_instructions); - - empty_ae_list(); - visit_list_elements(this, &ir->else_instructions); - - empty_ae_list(); - return visit_continue_with_parent; -} - -ir_visitor_status -cse_visitor::visit_enter(ir_function_signature *ir) -{ - empty_ae_list(); - visit_list_elements(this, &ir->body); - - empty_ae_list(); - return visit_continue_with_parent; -} - -ir_visitor_status -cse_visitor::visit_enter(ir_loop *ir) -{ - empty_ae_list(); - visit_list_elements(this, &ir->body_instructions); - - empty_ae_list(); - return visit_continue_with_parent; -} - -ir_visitor_status -cse_visitor::visit_enter(ir_call *) -{ - /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a - * pointer to the (ir_rvalue *) on the stack. Since we save those pointers - * in the AE list, we can't let handle_rvalue get called. - */ - return visit_continue_with_parent; -} - -/** - * Does a (uniform-value) constant subexpression elimination pass on the code - * present in the instruction stream. - */ -bool -do_cse(exec_list *instructions) -{ - cse_visitor v(instructions); - - visit_list_elements(&v, instructions); - - return v.progress; -} -- cgit v1.2.3 From d4ff638504acb31ed523c8c862f66a7483014cb7 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 29 Jun 2015 09:58:50 -0700 Subject: glx: Drop CRAY support. It couldn't have worked anyway. There were calls to undefined functions. Reviewed-by: Emil Velikov --- src/glx/packrender.h | 46 ------------------------------------------ src/glx/packsingle.h | 56 ---------------------------------------------------- 2 files changed, 102 deletions(-) diff --git a/src/glx/packrender.h b/src/glx/packrender.h index 4266d5cc67b..f8f38ca2ec5 100644 --- a/src/glx/packrender.h +++ b/src/glx/packrender.h @@ -157,7 +157,6 @@ #define __GLX_PUT_CHAR(offset,a) \ *((INT8 *) (pc + offset)) = a -#ifndef _CRAY #define __GLX_PUT_SHORT(offset,a) \ *((INT16 *) (pc + offset)) = a @@ -167,29 +166,6 @@ #define __GLX_PUT_FLOAT(offset,a) \ *((FLOAT32 *) (pc + offset)) = a -#else -#define __GLX_PUT_SHORT(offset,a) \ - { GLubyte *cp = (pc+offset); \ - int shift = (64-16) - ((int)(cp) >> (64-6)); \ - *(int *)cp = (*(int *)cp & ~(0xffff << shift)) | ((a & 0xffff) << shift); } - -#define __GLX_PUT_LONG(offset,a) \ - { GLubyte *cp = (pc+offset); \ - int shift = (64-32) - ((int)(cp) >> (64-6)); \ - *(int *)cp = (*(int *)cp & ~(0xffffffff << shift)) | ((a & 0xffffffff) << shift); } - -#define __GLX_PUT_FLOAT(offset,a) \ - gl_put_float((pc + offset),a) - -#define __GLX_PUT_DOUBLE(offset,a) \ - gl_put_double(pc + offset, a) - -extern void gl_put_float( /*GLubyte *, struct cray_single */ ); -extern void gl_put_double( /*GLubyte *, struct cray_double */ ); -#endif - -#ifndef _CRAY - #ifdef __GLX_ALIGN64 /* ** This can certainly be done better for a particular machine @@ -202,12 +178,9 @@ extern void gl_put_double( /*GLubyte *, struct cray_double */ ); *((FLOAT64 *) (pc + offset)) = a #endif -#endif - #define __GLX_PUT_CHAR_ARRAY(offset,a,alen) \ __GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_INT8) -#ifndef _CRAY #define __GLX_PUT_SHORT_ARRAY(offset,a,alen) \ __GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_INT16) @@ -220,24 +193,5 @@ extern void gl_put_double( /*GLubyte *, struct cray_double */ ); #define __GLX_PUT_DOUBLE_ARRAY(offset,a,alen) \ __GLX_MEM_COPY(pc + offset, a, alen * __GLX_SIZE_FLOAT64) -#else -#define __GLX_PUT_SHORT_ARRAY(offset,a,alen) \ - gl_put_short_array((GLubyte *)(pc + offset), a, alen * __GLX_SIZE_INT16) - -#define __GLX_PUT_LONG_ARRAY(offset,a,alen) \ - gl_put_long_array((GLubyte *)(pc + offset), (long *)a, alen * __GLX_SIZE_INT32) - -#define __GLX_PUT_FLOAT_ARRAY(offset,a,alen) \ - gl_put_float_array((GLubyte *)(pc + offset), (float *)a, alen * __GLX_SIZE_FLOAT32) - -#define __GLX_PUT_DOUBLE_ARRAY(offset,a,alen) \ - gl_put_double_array((GLubyte *)(pc + offset), (double *)a, alen * __GLX_SIZE_FLOAT64) - -extern gl_put_short_array(GLubyte *, short *, int); -extern gl_put_long_array(GLubyte *, long *, int); -extern gl_put_float_array(GLubyte *, float *, int); -extern gl_put_double_array(GLubyte *, double *, int); - -#endif /* _CRAY */ #endif /* !__GLX_packrender_h__ */ diff --git a/src/glx/packsingle.h b/src/glx/packsingle.h index 037265a7671..fddcbf157f8 100644 --- a/src/glx/packsingle.h +++ b/src/glx/packsingle.h @@ -83,7 +83,6 @@ #define __GLX_SINGLE_PUT_CHAR(offset,a) \ *((INT8 *) (pc + offset)) = a -#ifndef CRAY #define __GLX_SINGLE_PUT_SHORT(offset,a) \ *((INT16 *) (pc + offset)) = a @@ -93,21 +92,6 @@ #define __GLX_SINGLE_PUT_FLOAT(offset,a) \ *((FLOAT32 *) (pc + offset)) = a -#else -#define __GLX_SINGLE_PUT_SHORT(offset,a) \ - { GLubyte *cp = (pc+offset); \ - int shift = (64-16) - ((int)(cp) >> (64-6)); \ - *(int *)cp = (*(int *)cp & ~(0xffff << shift)) | ((a & 0xffff) << shift); } - -#define __GLX_SINGLE_PUT_LONG(offset,a) \ - { GLubyte *cp = (pc+offset); \ - int shift = (64-32) - ((int)(cp) >> (64-6)); \ - *(int *)cp = (*(int *)cp & ~(0xffffffff << shift)) | ((a & 0xffffffff) << shift); } - -#define __GLX_SINGLE_PUT_FLOAT(offset,a) \ - gl_put_float(pc + offset, a) -#endif - /* Read support macros */ #define __GLX_SINGLE_READ_XREPLY() \ (void) _XReply(dpy, (xReply*) &reply, 0, False) @@ -118,7 +102,6 @@ #define __GLX_SINGLE_GET_SIZE(a) \ a = (GLint) reply.size -#ifndef _CRAY #define __GLX_SINGLE_GET_CHAR(p) \ *p = *(GLbyte *)&reply.pad3; @@ -131,31 +114,6 @@ #define __GLX_SINGLE_GET_FLOAT(p) \ *p = *(GLfloat *)&reply.pad3; -#else -#define __GLX_SINGLE_GET_CHAR(p) \ - *p = reply.pad3 >> 24; - -#define __GLX_SINGLE_GET_SHORT(p) \ - {int t = reply.pad3 >> 16; \ - *p = (t & 0x8000) ? (t | ~0xffff) : (t & 0xffff);} - -#define __GLX_SINGLE_GET_LONG(p) \ - {int t = reply.pad3; \ - *p = (t & 0x80000000) ? (t | ~0xffffffff) : (t & 0xffffffff);} - -#define PAD3OFFSET 16 -#define __GLX_SINGLE_GET_FLOAT(p) \ - *p = gl_ntoh_float((GLubyte *)&reply + PAD3OFFSET); - -#define __GLX_SINGLE_GET_DOUBLE(p) \ - *p = gl_ntoh_double((GLubyte *)&reply + PAD3OFFSET); - -extern float gl_ntoh_float(GLubyte *); -extern float gl_ntoh_double(GLubyte *); -#endif - -#ifndef _CRAY - #ifdef __GLX_ALIGN64 #define __GLX_SINGLE_GET_DOUBLE(p) \ __GLX_MEM_COPY(p, &reply.pad3, 8) @@ -164,8 +122,6 @@ extern float gl_ntoh_double(GLubyte *); *p = *(GLdouble *)&reply.pad3 #endif -#endif - /* Get an array of typed data */ #define __GLX_SINGLE_GET_VOID_ARRAY(a,alen) \ { \ @@ -192,22 +148,10 @@ extern float gl_ntoh_double(GLubyte *); #define __GLX_SINGLE_GET_LONG_ARRAY(a,alen) \ _XRead(dpy,(char *)a,alen*__GLX_SIZE_INT32); -#ifndef _CRAY #define __GLX_SINGLE_GET_FLOAT_ARRAY(a,alen) \ _XRead(dpy,(char *)a,alen*__GLX_SIZE_FLOAT32); #define __GLX_SINGLE_GET_DOUBLE_ARRAY(a,alen) \ _XRead(dpy,(char *)a,alen*__GLX_SIZE_FLOAT64); -#else -#define __GLX_SINGLE_GET_FLOAT_ARRAY(a,alen) \ - gl_get_float_array(dpy,a,alen); - -#define __GLX_SINGLE_GET_DOUBLE_ARRAY(a,alen) \ - gl_get_double_array(dpy, a, alen); - -extern void gl_get_float_array(Display * dpy, float *a, int alen); -extern void gl_get_double_array(Display * dpy, double *a, int alen); -#endif - #endif /* !__GLX_packsingle_h__ */ -- cgit v1.2.3 From 833fa9a8cd4e3ac447c473ecb0a35294ff5f1e65 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 14 Aug 2015 11:19:49 -0700 Subject: meta: Update comment about unsupported texture types. Ken added support for 2DArray (commit ec23d5197e) and 1DArray (commit 14ca61125) last year. Reviewed-by: Anuj Phogat --- src/mesa/drivers/common/meta_generate_mipmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index 5dc40a2aa33..4800278a467 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -150,8 +150,7 @@ prepare_mipmap_level(struct gl_context *ctx, /** * Called via ctx->Driver.GenerateMipmap() - * Note: We don't yet support 3D textures, 1D/2D array textures or texture - * borders. + * Note: We don't yet support 3D textures, or texture borders. */ void _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, -- cgit v1.2.3 From dbae576f7f25fef72ca0b9f6f4822e0fddf7d607 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 19 Jun 2015 13:36:15 -0700 Subject: i965: add EXT_polygon_offset_clamp support to gen4/gen5 Reviewed-by: Matt Turner Signed-off-by: Ilia Mirkin --- src/mesa/drivers/dri/i965/brw_clip.c | 1 + src/mesa/drivers/dri/i965/brw_clip.h | 1 + src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 14 ++++++++++++++ src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_misc_state.c | 8 -------- src/mesa/drivers/dri/i965/brw_wm_state.c | 11 +++++++++++ src/mesa/drivers/dri/i965/intel_extensions.c | 2 +- 7 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 3a73c64a88b..2d5abc70cb9 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -223,6 +223,7 @@ brw_upload_clip_prog(struct brw_context *brw) /* _NEW_POLYGON, _NEW_BUFFERS */ key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2; key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; + key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD; } if (!ctx->Polygon._FrontBit) { diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index 4e38f2f2ed6..54c76822e22 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -62,6 +62,7 @@ struct brw_clip_prog_key { GLfloat offset_factor; GLfloat offset_units; + GLfloat offset_clamp; }; diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index 6baf620a1a7..9a4d2a9d6f9 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -188,6 +188,12 @@ static void copy_bfc( struct brw_clip_compile *c ) GLfloat bc = dir.y * iz; offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + if (ctx->Polygon.OffsetClamp && isfinite(ctx->Polygon.OffsetClamp)) { + if (ctx->Polygon.OffsetClamp < 0) + offset = MAX2( offset, ctx->Polygon.OffsetClamp ); + else + offset = MIN2( offset, ctx->Polygon.OffsetClamp ); + } offset *= MRD; */ static void compute_offset( struct brw_clip_compile *c ) @@ -211,6 +217,14 @@ static void compute_offset( struct brw_clip_compile *c ) brw_MUL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_factor)); brw_ADD(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_units)); + if (c->key.offset_clamp && isfinite(c->key.offset_clamp)) { + brw_CMP(p, + vec1(brw_null_reg()), + c->key.offset_clamp < 0 ? BRW_CONDITIONAL_GE : BRW_CONDITIONAL_L, + vec1(off), + brw_imm_f(c->key.offset_clamp)); + brw_SEL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_clamp)); + } } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 546f8fc35fe..0a29a692016 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1457,6 +1457,8 @@ struct brw_context */ drm_intel_bo *multisampled_null_render_target_bo; uint32_t fast_clear_op; + + float offset_clamp; } wm; struct { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 7d17edb9023..cf6ba5b4aeb 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -887,14 +887,6 @@ brw_upload_invariant_state(struct brw_context *brw) brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE); brw->last_pipeline = BRW_RENDER_PIPELINE; - if (brw->gen < 6) { - /* Disable depth offset clamping. */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); - OUT_BATCH_F(0.0); - ADVANCE_BATCH(); - } - if (brw->gen >= 8) { BEGIN_BATCH(3); OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index cd0b56ba60c..ec54ef2acd9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -31,6 +31,7 @@ +#include "intel_batchbuffer.h" #include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" @@ -251,6 +252,16 @@ brw_upload_wm_unit(struct brw_context *brw) } brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; + + /* _NEW_POLGYON */ + if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); + OUT_BATCH_F(ctx->Polygon.OffsetClamp); + ADVANCE_BATCH(); + + brw->wm.offset_clamp = ctx->Polygon.OffsetClamp; + } } const struct brw_tracked_state brw_wm_unit = { diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 3c77f4773c6..3f9afd16c71 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -229,6 +229,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_packed_float = true; ctx->Extensions.EXT_pixel_buffer_object = true; ctx->Extensions.EXT_point_parameters = true; + ctx->Extensions.EXT_polygon_offset_clamp = true; ctx->Extensions.EXT_provoking_vertex = true; ctx->Extensions.EXT_stencil_two_side = true; ctx->Extensions.EXT_texture_array = true; @@ -300,7 +301,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.AMD_vertex_shader_layer = true; ctx->Extensions.EXT_framebuffer_multisample = true; ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true; - ctx->Extensions.EXT_polygon_offset_clamp = true; ctx->Extensions.EXT_transform_feedback = true; ctx->Extensions.OES_depth_texture_cube_map = true; -- cgit v1.2.3 From 64831832791139328a67b80387f062d39e304d24 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Sun, 4 Oct 2015 11:23:04 +1100 Subject: docs: Mark GL_ARB_enhanced_layouts as in progress --- docs/GL3.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index e020deb3b6c..e17e783d331 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -178,7 +178,13 @@ GL 4.4, GLSL 4.40: GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware) - GL_ARB_enhanced_layouts not started + GL_ARB_enhanced_layouts in progress (Timothy) + - compile-time constant expressions in progress + - explicit byte offsets for blocks in progress + - forced alignment within blocks in progress + - specified vec4-slot component numbers in progress + - specified transform/feedback layout in progress + - input/output block locations in progress GL_ARB_multi_bind DONE (all drivers) GL_ARB_query_buffer_object not started GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) -- cgit v1.2.3 From 82db642042585ddb54a2da44730b082062188d65 Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Tue, 22 Sep 2015 16:25:46 +0200 Subject: glsl: add std430 layout support for AoA Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Timothy Arceri --- src/glsl/glsl_types.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 25927f67c44..b9cb97cbeae 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -1557,8 +1557,8 @@ glsl_type::std430_size(bool row_major) const unsigned int array_len; if (this->is_array()) { - element_type = this->fields.array; - array_len = this->length; + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); } else { element_type = this; array_len = 1; @@ -1581,10 +1581,12 @@ glsl_type::std430_size(bool row_major) const } if (this->is_array()) { - if (this->fields.array->is_record()) - return this->length * this->fields.array->std430_size(row_major); + if (this->without_array()->is_record()) + return this->arrays_of_arrays_size() * + this->without_array()->std430_size(row_major); else - return this->length * this->fields.array->std430_base_alignment(row_major); + return this->arrays_of_arrays_size() * + this->without_array()->std430_base_alignment(row_major); } if (this->is_record() || this->is_interface()) { -- cgit v1.2.3 From 50d5a36f3575bae38266e2445989c3b672ddbdbf Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Mon, 5 Oct 2015 11:06:07 +0200 Subject: main: array stride for unsized arrays of arrays are calculated like records Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Timothy Arceri --- src/mesa/main/shader_query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 718967605b5..6d73e3bdcf2 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -996,7 +996,7 @@ program_resource_top_level_array_stride(struct gl_shader_program *shProg, const glsl_type *array_type = field->type->fields.array; if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { - if (array_type->is_record()) { + if (array_type->is_record() || array_type->is_array()) { array_stride = array_type->std140_size(row_major); array_stride = glsl_align(array_stride, 16); } else { -- cgit v1.2.3 From b7766a95e1b81e27624d91edf83dea91fb64a42e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 1 Oct 2015 13:12:57 -0600 Subject: glsl: whitespace/formatting/typo fixes in link_uniforms.cpp --- src/glsl/link_uniforms.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 0642ddc4bf5..0ccd9c8c865 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -739,7 +739,7 @@ private: handle_subroutines(base_type, &this->uniforms[id]); /* For array of arrays or struct arrays the base location may have - * already been set so dont set it again. + * already been set so don't set it again. */ if (ubo_block_index == -1 && current_var->data.location == -1) { current_var->data.location = id; @@ -764,7 +764,7 @@ private: this->explicit_location + field_counter; field_counter += entries; } else { - this->uniforms[id].remap_location = this->explicit_location; + this->uniforms[id].remap_location = this->explicit_location; } } else { /* Initialize to to indicate that no location is set */ @@ -815,12 +815,13 @@ private: if (type->without_array()->is_matrix()) { const glsl_type *matrix = type->without_array(); const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4; - const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements; + const unsigned items = + row_major ? matrix->matrix_columns : matrix->vector_elements; assert(items <= 4); if (packing == GLSL_INTERFACE_PACKING_STD430) this->uniforms[id].matrix_stride = items < 3 ? items * N : - glsl_align(items * N, 16); + glsl_align(items * N, 16); else this->uniforms[id].matrix_stride = glsl_align(items * N, 16); this->uniforms[id].row_major = row_major; @@ -1154,7 +1155,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog, foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { ir_variable *const var = node->as_variable(); - if ((var == NULL) || (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage)) + if ((var == NULL) || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) continue; parcel.set_and_process(prog, var); @@ -1163,7 +1165,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog, prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used; prog->_LinkedShaders[i]->shadow_samplers = parcel.shader_shadow_samplers; - STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == sizeof(parcel.targets)); + STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == + sizeof(parcel.targets)); memcpy(prog->_LinkedShaders[i]->SamplerTargets, parcel.targets, sizeof(prog->_LinkedShaders[i]->SamplerTargets)); } -- cgit v1.2.3 From 3801fa65c1ebb44d93cc6c5780906e0fa0d0b676 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 5 Oct 2015 07:44:36 -0600 Subject: tgsi: add const qualifier to silence warning Trivial. --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 00b07c877a8..d76dddbf7d9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -119,7 +119,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID || fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { - struct tgsi_full_src_register *src0 = &fullinst->Src[0]; + const struct tgsi_full_src_register *src0 = &fullinst->Src[0]; unsigned input; if (src0->Register.Indirect && src0->Indirect.ArrayID) -- cgit v1.2.3 From fad5fd3a254c9c37bc25cd356d2bced0dcfe9e26 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:29 +0200 Subject: i915: Use C99 initializers for primitive arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using C99 initializers for the primitive arrays makes things more readable. Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i915/intel_render.c | 60 ++++++++++++++++---------------- src/mesa/drivers/dri/i915/intel_tris.c | 40 ++++++++++----------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index 6c2ad6c6c95..409760dc604 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -67,42 +67,42 @@ #define HAVE_ELTS 0 static const uint32_t hw_prim[GL_POLYGON + 1] = { - 0, - PRIM3D_LINELIST, - PRIM3D_LINESTRIP, - PRIM3D_LINESTRIP, - PRIM3D_TRILIST, - PRIM3D_TRISTRIP, - PRIM3D_TRIFAN, - 0, - 0, - PRIM3D_POLY + [GL_POINTS] = 0, + [GL_LINES ] = PRIM3D_LINELIST, + [GL_LINE_LOOP] = PRIM3D_LINESTRIP, + [GL_LINE_STRIP] = PRIM3D_LINESTRIP, + [GL_TRIANGLES] = PRIM3D_TRILIST, + [GL_TRIANGLE_STRIP] = PRIM3D_TRISTRIP, + [GL_TRIANGLE_FAN] = PRIM3D_TRIFAN, + [GL_QUADS] = 0, + [GL_QUAD_STRIP] = 0, + [GL_POLYGON] = PRIM3D_POLY, }; static const GLenum reduced_prim[GL_POLYGON + 1] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES + [GL_POINTS] = GL_POINTS, + [GL_LINES] = GL_LINES, + [GL_LINE_LOOP] = GL_LINES, + [GL_LINE_STRIP] = GL_LINES, + [GL_TRIANGLES] = GL_TRIANGLES, + [GL_TRIANGLE_STRIP] = GL_TRIANGLES, + [GL_TRIANGLE_FAN] = GL_TRIANGLES, + [GL_QUADS] = GL_TRIANGLES, + [GL_QUAD_STRIP] = GL_TRIANGLES, + [GL_POLYGON] = GL_TRIANGLES, }; static const int scale_prim[GL_POLYGON + 1] = { - 0, /* fallback case */ - 1, - 2, - 2, - 1, - 3, - 3, - 0, /* fallback case */ - 0, /* fallback case */ - 3 + [GL_POINTS] = 0, /* fallback case */ + [GL_LINES] = 1, + [GL_LINE_LOOP] = 2, + [GL_LINE_STRIP] = 2, + [GL_TRIANGLES] = 1, + [GL_TRIANGLE_STRIP] = 3, + [GL_TRIANGLE_FAN] = 3, + [GL_QUADS] = 0, /* fallback case */ + [GL_QUAD_STRIP] = 0, /* fallback case */ + [GL_POLYGON] = 3, }; diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index ae62a800fb7..1554613aa4f 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -670,16 +670,16 @@ do { \ ***********************************************************************/ static const GLuint hw_prim[GL_POLYGON + 1] = { - PRIM3D_POINTLIST, - PRIM3D_LINELIST, - PRIM3D_LINELIST, - PRIM3D_LINELIST, - PRIM3D_TRILIST, - PRIM3D_TRILIST, - PRIM3D_TRILIST, - PRIM3D_TRILIST, - PRIM3D_TRILIST, - PRIM3D_TRILIST + [GL_POINTS] = PRIM3D_POINTLIST, + [GL_LINES] = PRIM3D_LINELIST, + [GL_LINE_LOOP] = PRIM3D_LINELIST, + [GL_LINE_STRIP] = PRIM3D_LINELIST, + [GL_TRIANGLES] = PRIM3D_TRILIST, + [GL_TRIANGLE_STRIP] = PRIM3D_TRILIST, + [GL_TRIANGLE_FAN] = PRIM3D_TRILIST, + [GL_QUADS] = PRIM3D_TRILIST, + [GL_QUAD_STRIP] = PRIM3D_TRILIST, + [GL_POLYGON] = PRIM3D_TRILIST, }; #define RASTERIZE(x) intelRasterPrimitive( ctx, x, hw_prim[x] ) @@ -1043,16 +1043,16 @@ intelChooseRenderState(struct gl_context * ctx) } static const GLenum reduced_prim[GL_POLYGON + 1] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES + [GL_POINTS] = GL_POINTS, + [GL_LINES] = GL_LINES, + [GL_LINE_LOOP] = GL_LINES, + [GL_LINE_STRIP] = GL_LINES, + [GL_TRIANGLES] = GL_TRIANGLES, + [GL_TRIANGLE_STRIP] = GL_TRIANGLES, + [GL_TRIANGLE_FAN] = GL_TRIANGLES, + [GL_QUADS] = GL_TRIANGLES, + [GL_QUAD_STRIP] = GL_TRIANGLES, + [GL_POLYGON] = GL_TRIANGLES }; -- cgit v1.2.3 From 68976a5a009beeb5118a52c31b0e7784202707a3 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 30 Sep 2015 15:22:38 -0700 Subject: i965: Use C99 initializers for primitive arrays Using C99 initializers for the primitive arrays makes things more readable. Signed-off-by: Ian Romanick Suggested-by: Matt Turner Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_draw.c | 48 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 6a75e067915..c0517e6c0fb 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -55,34 +55,34 @@ #define FILE_DEBUG_FLAG DEBUG_PRIMS static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { - _3DPRIM_POINTLIST, - _3DPRIM_LINELIST, - _3DPRIM_LINELOOP, - _3DPRIM_LINESTRIP, - _3DPRIM_TRILIST, - _3DPRIM_TRISTRIP, - _3DPRIM_TRIFAN, - _3DPRIM_QUADLIST, - _3DPRIM_QUADSTRIP, - _3DPRIM_POLYGON, - _3DPRIM_LINELIST_ADJ, - _3DPRIM_LINESTRIP_ADJ, - _3DPRIM_TRILIST_ADJ, - _3DPRIM_TRISTRIP_ADJ, + [GL_POINTS] =_3DPRIM_POINTLIST, + [GL_LINES] = _3DPRIM_LINELIST, + [GL_LINE_LOOP] = _3DPRIM_LINELOOP, + [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, + [GL_TRIANGLES] = _3DPRIM_TRILIST, + [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [GL_QUADS] = _3DPRIM_QUADLIST, + [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, + [GL_POLYGON] = _3DPRIM_POLYGON, + [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, }; static const GLenum reduced_prim[GL_POLYGON+1] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES + [GL_POINTS] = GL_POINTS, + [GL_LINES] = GL_LINES, + [GL_LINE_LOOP] = GL_LINES, + [GL_LINE_STRIP] = GL_LINES, + [GL_TRIANGLES] = GL_TRIANGLES, + [GL_TRIANGLE_STRIP] = GL_TRIANGLES, + [GL_TRIANGLE_FAN] = GL_TRIANGLES, + [GL_QUADS] = GL_TRIANGLES, + [GL_QUAD_STRIP] = GL_TRIANGLES, + [GL_POLYGON] = GL_TRIANGLES }; uint32_t -- cgit v1.2.3 From 46b13666d8c045acf066f2982cc0a89b6584f09d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 30 Sep 2015 15:23:38 -0700 Subject: radeon: Use C99 initializers for primitive arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using C99 initializers for the primitive arrays makes things more readable. Signed-off-by: Ian Romanick Suggested-by: Matt Turner Reviewed-by: Marek Olšák Reviewed-by: Matt Turner --- src/mesa/drivers/dri/radeon/radeon_swtcl.c | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index b671a3be143..d7a02e90266 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -359,16 +359,16 @@ void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset) #define HAVE_ELTS 0 static const GLuint hw_prim[GL_POLYGON+1] = { - RADEON_CP_VC_CNTL_PRIM_TYPE_POINT, - RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, - 0, - RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN, - 0, - 0, - 0 + [GL_POINTS] = RADEON_CP_VC_CNTL_PRIM_TYPE_POINT, + [GL_LINES] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + [GL_LINE_LOOP] = 0, + [GL_LINE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP, + [GL_TRIANGLES] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + [GL_TRIANGLE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP, + [GL_TRIANGLE_FAN] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN, + [GL_QUADS] = 0, + [GL_QUAD_STRIP] = 0, + [GL_POLYGON] = 0 }; static inline void @@ -468,16 +468,16 @@ const struct tnl_pipeline_stage _radeon_render_stage = static const GLuint reduced_hw_prim[GL_POLYGON+1] = { - RADEON_CP_VC_CNTL_PRIM_TYPE_POINT, - RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, - RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, - RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, - RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST + [GL_POINTS] = RADEON_CP_VC_CNTL_PRIM_TYPE_POINT, + [GL_LINES] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + [GL_LINE_LOOP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + [GL_LINE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + [GL_TRIANGLES] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + [GL_TRIANGLE_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + [GL_TRIANGLE_FAN] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + [GL_QUADS] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + [GL_QUAD_STRIP] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + [GL_POLYGON] = RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST }; static void radeonRasterPrimitive( struct gl_context *ctx, GLuint hwprim ); -- cgit v1.2.3 From 5ca00e0b8dff529f33c8b1685a9109a78090987c Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 18 Sep 2015 14:56:13 -0400 Subject: t_dd_dmatmp: Replace fprintf with unreachable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From http://lists.freedesktop.org/archives/mesa-dev/2015-May/084883.html: "There are no real error cases here, just dead code. validate_render() is supposed to make sure we never call these functions if the code can't actually render the primitives. The fprintf()+return branches should really just contain assert(0) or equivalent." I also rearranged the if-else-block in render_quad_strip_verts to look more like the other functions. A future patch is going to change a bunch of that code anyway. v2: Make "unreachable" message more descriptive. Suggested by Iago. Signed-off-by: Ian Romanick Suggested-by: Ville Syrjälä Reviewed-by: Iago Toral Quiroga --- src/mesa/tnl_dd/t_dd_dmatmp.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index e7e19a03597..181cf6701dd 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -85,8 +85,8 @@ static void TAG(render_points_verts)(struct gl_context *ctx, currentsz = dmasz; } } else { - fprintf(stderr, "%s - cannot draw primitive\n", __func__); - return; + unreachable("Cannot draw primitive; validate_render should have " + "prevented this"); } } @@ -319,8 +319,8 @@ static void TAG(render_poly_verts)(struct gl_context *ctx, } else if (ctx->Light.ShadeModel == GL_SMOOTH) { TAG(render_tri_fan_verts)( ctx, start, count, flags ); } else { - fprintf(stderr, "%s - cannot draw primitive\n", __func__); - return; + unreachable("Cannot draw primitive; validate_render should have " + "prevented this"); } } @@ -331,14 +331,8 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx, { GLuint j, nr; - if (ctx->Light.ShadeModel == GL_FLAT && - TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) { - /* Vertices won't fit in a single buffer or elts not available - should - * never happen. - */ - fprintf(stderr, "%s - cannot draw primitive\n", __func__); - return; - } else { + if (ctx->Light.ShadeModel != GL_FLAT || + !TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) { LOCAL_VARS; const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1; unsigned currentsz; @@ -364,6 +358,9 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx, } FLUSH(); + } else { + unreachable("Cannot draw primitive; validate_render should have " + "prevented this"); } } -- cgit v1.2.3 From b15b4581d15684d59e1fc4d7cad72ece4dd9fcb0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:18 +0200 Subject: t_dd_dmatmp: Allow flat shaded polygons with tri fans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can allow rendering flat shaded polygons using tri fans if we check the provoking vertex convention. v2 (idr): Remove _EXT suffixes from GL_FIRST_VERTEX_CONVENTION. Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/tnl_dd/t_dd_dmatmp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 181cf6701dd..7d03b026bd5 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -316,7 +316,8 @@ static void TAG(render_poly_verts)(struct gl_context *ctx, } FLUSH(); - } else if (ctx->Light.ShadeModel == GL_SMOOTH) { + } else if (ctx->Light.ShadeModel == GL_SMOOTH || + ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { TAG(render_tri_fan_verts)( ctx, start, count, flags ); } else { unreachable("Cannot draw primitive; validate_render should have " @@ -458,7 +459,8 @@ static bool TAG(validate_render)(struct gl_context *ctx, ok = true; break; case GL_POLYGON: - ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH; + ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH || + ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION; break; case GL_QUAD_STRIP: ok = VB->Elts || -- cgit v1.2.3 From 83d511e1904b565490e6c8335a1d329e0fcf9c41 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:19 +0200 Subject: t_dd_dmatmp: Disallow flat shading when rendering quad strips via tri strips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When rendering quad strips via tri strips we can't get the provoking vertex right, so disallow flat shading. v2: Major rebase on top of Ian's other t_dd_dmatmp.h work. Signed-off-by: Ville Syrjälä Signed-off-by: Ian Romanick --- src/mesa/tnl_dd/t_dd_dmatmp.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 7d03b026bd5..04182f02059 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -332,8 +332,7 @@ static void TAG(render_quad_strip_verts)(struct gl_context *ctx, { GLuint j, nr; - if (ctx->Light.ShadeModel != GL_FLAT || - !TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) { + if (ctx->Light.ShadeModel == GL_SMOOTH) { LOCAL_VARS; const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1; unsigned currentsz; @@ -463,9 +462,7 @@ static bool TAG(validate_render)(struct gl_context *ctx, ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION; break; case GL_QUAD_STRIP: - ok = VB->Elts || - (ctx->Light.ShadeModel != GL_FLAT || - VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0); + ok = VB->Elts || ctx->Light.ShadeModel == GL_SMOOTH; break; case GL_QUADS: ok = true; /* flatshading is ok. */ -- cgit v1.2.3 From 08864265039437260beffcdc3471e5788d142e86 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:20 +0200 Subject: t_dd_dmatmp: Check provoking vertex convention when rendering quads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When drawing quads using triangles we need to be careful to make the provoking vertices match when flat shading. v2: Major rebase on top of Ian's other t_dd_dmatmp.h work. Signed-off-by: Ville Syrjälä Signed-off-by: Ian Romanick --- src/mesa/tnl_dd/t_dd_dmatmp.h | 46 ++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 04182f02059..56fa1a382f7 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -370,28 +370,33 @@ static void TAG(render_quads_verts)(struct gl_context *ctx, GLuint count, GLuint flags) { - LOCAL_VARS; - GLuint j; - - /* Emit whole number of quads in total. */ - count -= count & 3; + if (ctx->Light.ShadeModel == GL_SMOOTH || + ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + LOCAL_VARS; + GLuint j; - /* Hardware doesn't have a quad primitive type -- try to simulate it using - * triangle primitive. This is a win for gears, but is it useful in the - * broader world? - */ - INIT(GL_TRIANGLES); + /* Emit whole number of quads in total. */ + count -= count & 3; - for (j = 0; j + 3 < count; j += 4) { - void *tmp = ALLOC_VERTS(6); - /* Send v0, v1, v3 + /* Hardware doesn't have a quad primitive type -- try to simulate it using + * triangle primitive. This is a win for gears, but is it useful in the + * broader world? */ - tmp = EMIT_VERTS(ctx, start + j, 2, tmp); - tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp); - /* Send v1, v2, v3 - */ - tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp); - (void) tmp; + INIT(GL_TRIANGLES); + + for (j = 0; j + 3 < count; j += 4) { + void *tmp = ALLOC_VERTS(6); + /* Send v0, v1, v3 + */ + tmp = EMIT_VERTS(ctx, start + j, 2, tmp); + tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp); + /* Send v1, v2, v3 + */ + tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp); + (void) tmp; + } + } else { + unreachable("Cannot draw primitive"); } } @@ -465,7 +470,8 @@ static bool TAG(validate_render)(struct gl_context *ctx, ok = VB->Elts || ctx->Light.ShadeModel == GL_SMOOTH; break; case GL_QUADS: - ok = true; /* flatshading is ok. */ + ok = ctx->Light.ShadeModel == GL_SMOOTH || + ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION; break; default: break; -- cgit v1.2.3 From 303895655c6ed837e3d867a450f4322404e86a88 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:25 +0200 Subject: i915: Handle provoking vertex in intelFastRenderClippedPoly() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intelFastRenderClippedPoly() renders the polygon using triangles. For polygons the provoking vertex is always the first one, and currently this function assumes that the provoking vertex for triangles is the last one. In case the user changed the provoking vertex convention, the hardware may be configured to treat the first vertex of triangles as the provoking vertex. So check the convention and emit the triangles in the appropriate order to avoid having to change the hardware provoking vertex convention for rendering polygons. Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/intel_tris.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 1554613aa4f..6133c23919d 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -928,10 +928,18 @@ intelFastRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint const GLuint *start = (const GLuint *) V(elts[0]); int i, j; - for (i = 2; i < n; i++) { - COPY_DWORDS(j, vb, vertsize, V(elts[i - 1])); - COPY_DWORDS(j, vb, vertsize, V(elts[i])); - COPY_DWORDS(j, vb, vertsize, start); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + for (i = 2; i < n; i++) { + COPY_DWORDS(j, vb, vertsize, V(elts[i - 1])); + COPY_DWORDS(j, vb, vertsize, V(elts[i])); + COPY_DWORDS(j, vb, vertsize, start); + } + } else { + for (i = 2; i < n; i++) { + COPY_DWORDS(j, vb, vertsize, start); + COPY_DWORDS(j, vb, vertsize, V(elts[i - 1])); + COPY_DWORDS(j, vb, vertsize, V(elts[i])); + } } } -- cgit v1.2.3 From bcf650496f22961fb66975aba3c8a982dbc72c9e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:27 +0200 Subject: i915: Use _tnl_RenderClippedPolygon and _tnl_RenderClippedLine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _tnl_RenderClippedPolygon and _tnl_RenderClippedLine already do most of what we want so use them. Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/intel_tris.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 6133c23919d..6407ea96f32 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -891,18 +891,11 @@ intelRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n) { struct intel_context *intel = intel_context(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLuint prim = intel->render_primitive; /* Render the new vertices as an unclipped polygon. */ - { - GLuint *tmp = VB->Elts; - VB->Elts = (GLuint *) elts; - tnl->Driver.Render.PrimTabElts[GL_POLYGON] (ctx, 0, n, - PRIM_BEGIN | PRIM_END); - VB->Elts = tmp; - } + _tnl_RenderClippedPolygon(ctx, elts, n); /* Restore the render primitive */ @@ -910,14 +903,6 @@ intelRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n) tnl->Driver.Render.PrimitiveNotify(ctx, prim); } -static void -intelRenderClippedLine(struct gl_context * ctx, GLuint ii, GLuint jj) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - - tnl->Driver.Render.Line(ctx, ii, jj); -} - static void intelFastRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n) { @@ -1044,7 +1029,7 @@ intelChooseRenderState(struct gl_context * ctx) else { tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; - tnl->Driver.Render.ClippedLine = intelRenderClippedLine; + tnl->Driver.Render.ClippedLine = _tnl_RenderClippedLine; tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly; } } -- cgit v1.2.3 From 0febd0ecfd1e2a36381ab7793811b9c7891ed82f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:30 +0200 Subject: i915: Use COPY_DWORDS for points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sub-pixel adjustment for points was killed off in commit 60d762aa625095a8c1f9597d8530bb5a6fa61b4c Author: Xiang, Haihao Date: Wed Jan 2 11:38:51 2008 +0800 i915: Needn't adjust pixel centers. fix #12944 so we can just as well use COPY_DWORDS(). Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/intel_tris.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 6407ea96f32..a093a0a7bd2 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -426,11 +426,7 @@ intel_draw_point(struct intel_context *intel, intelVertexPtr v0) GLuint *vb = intel_get_prim_space(intel, 1); int j; - /* Adjust for sub pixel position -- still required for conform. */ - *(float *) &vb[0] = v0->v.x; - *(float *) &vb[1] = v0->v.y; - for (j = 2; j < vertsize; j++) - vb[j] = v0->ui[j]; + COPY_DWORDS(j, vb, vertsize, v0); } -- cgit v1.2.3 From 00ee403883abedb966550d6ab50a1c1f6613175f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:31 +0200 Subject: i915: Enable intel_render path for points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sub-pixel adjustment for points was killed off in commit 60d762aa625095a8c1f9597d8530bb5a6fa61b4c Author: Xiang, Haihao Date: Wed Jan 2 11:38:51 2008 +0800 i915: Needn't adjust pixel centers. fix #12944 so if we don't need it in intel_tris.c we don't need it in intel_render.c either, which means we can allow intel_render.c to render points. No apparent regressions on PNV in ES1 or ES2 conformance. Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/intel_render.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index 409760dc604..c1603565cc2 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -54,9 +54,7 @@ * dma buffers. Use strip/fan hardware primitives where possible. * Try to simulate missing primitives with indexed vertices. */ -#define HAVE_POINTS 0 /* Has it, but can't use because subpixel has to - * be adjusted for points on the INTEL/I845G - */ +#define HAVE_POINTS 1 #define HAVE_LINES 1 #define HAVE_LINE_STRIPS 1 #define HAVE_TRIANGLES 1 @@ -67,7 +65,7 @@ #define HAVE_ELTS 0 static const uint32_t hw_prim[GL_POLYGON + 1] = { - [GL_POINTS] = 0, + [GL_POINTS] = PRIM3D_POINTLIST, [GL_LINES ] = PRIM3D_LINELIST, [GL_LINE_LOOP] = PRIM3D_LINESTRIP, [GL_LINE_STRIP] = PRIM3D_LINESTRIP, @@ -93,7 +91,7 @@ static const GLenum reduced_prim[GL_POLYGON + 1] = { }; static const int scale_prim[GL_POLYGON + 1] = { - [GL_POINTS] = 0, /* fallback case */ + [GL_POINTS] = 1, [GL_LINES] = 1, [GL_LINE_LOOP] = 2, [GL_LINE_STRIP] = 2, -- cgit v1.2.3 From 3e2c7ca7731362b9f8f872832aeed2f89e70e11c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:32 +0200 Subject: i915: Adjust line size limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware can draw lines 0.5 to 7.5 pixels wide. Adjust the limits to 1.0-7.0. The old limits seems to be from the era when i915 and i965 were sharing this code. Not really sure if 1.0-7.0 is correct. Maybe it could be 0.5.7.5 as those are the hw limits, or maybe some combination of the two? Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/intel_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index c780103228f..6c737ea6877 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -474,8 +474,8 @@ intelInitContext(struct intel_context *intel, ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 5.0; - ctx->Const.MaxLineWidthAA = 5.0; + ctx->Const.MaxLineWidth = 7.0; + ctx->Const.MaxLineWidthAA = 7.0; ctx->Const.LineWidthGranularity = 0.5; ctx->Const.MinPointSize = 1.0; -- cgit v1.2.3 From 021f15816e74ec012b8cb904d9f0bc05ff5885b0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:35 +0200 Subject: i830: Fix culling with user fbos on gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flip the cull bits when rendering to a user fbo on gen2. This was already done on gen3 (since before git history starts) but was missing from the gen2 code. Fixes rendering of the driver+kart model in supertuxkart kart selection screen. Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/i830_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index 906e942b020..7ce5ef7e41a 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -601,6 +601,8 @@ i830CullFaceFrontFace(struct gl_context * ctx, GLenum unused) else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) { mode = CULLMODE_CW; + if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer)) + mode ^= (CULLMODE_CW ^ CULLMODE_CCW); if (ctx->Polygon.CullFaceMode == GL_FRONT) mode ^= (CULLMODE_CW ^ CULLMODE_CCW); if (ctx->Polygon.FrontFace != GL_CCW) -- cgit v1.2.3 From ed7f00f564fc2175693560c0697d318cf7b4a13c Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 6 Oct 2015 10:55:03 -0700 Subject: i965: Don't override NewFramebuffer just to call _mesa_new_framebuffer Signed-off-by: Ian Romanick Reviewed-by: Brian Paul --- src/mesa/drivers/dri/i965/intel_buffers.h | 2 -- src/mesa/drivers/dri/i965/intel_fbo.c | 14 -------------- 2 files changed, 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_buffers.h b/src/mesa/drivers/dri/i965/intel_buffers.h index 85f54b2c653..016039a628d 100644 --- a/src/mesa/drivers/dri/i965/intel_buffers.h +++ b/src/mesa/drivers/dri/i965/intel_buffers.h @@ -30,8 +30,6 @@ #include "drm.h" #include "brw_context.h" -struct intel_framebuffer; - extern void intelInitBufferFuncs(struct dd_function_table *functions); bool brw_is_front_buffer_reading(struct gl_framebuffer *fb); diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 6b2349e8b69..5a6b0dd1ec5 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -52,19 +52,6 @@ #define FILE_DEBUG_FLAG DEBUG_FBO -/** - * Create a new framebuffer object. - */ -static struct gl_framebuffer * -intel_new_framebuffer(struct gl_context * ctx, GLuint name) -{ - /* Only drawable state in intel_framebuffer at this time, just use Mesa's - * class - */ - return _mesa_new_framebuffer(ctx, name); -} - - /** Called by gl_renderbuffer::Delete() */ static void intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) @@ -1093,7 +1080,6 @@ void intel_fbo_init(struct brw_context *brw) { struct dd_function_table *dd = &brw->ctx.Driver; - dd->NewFramebuffer = intel_new_framebuffer; dd->NewRenderbuffer = intel_new_renderbuffer; dd->MapRenderbuffer = intel_map_renderbuffer; dd->UnmapRenderbuffer = intel_unmap_renderbuffer; -- cgit v1.2.3 From e32a6590a43fa9ff792639e44555019077d461ba Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 6 Oct 2015 10:55:03 -0700 Subject: i915: Don't override NewFramebuffer just to call _mesa_new_framebuffer Signed-off-by: Ian Romanick Reviewed-by: Brian Paul --- src/mesa/drivers/dri/i915/intel_buffers.h | 1 - src/mesa/drivers/dri/i915/intel_fbo.c | 14 -------------- 2 files changed, 15 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_buffers.h b/src/mesa/drivers/dri/i915/intel_buffers.h index 42d84bcfcb7..6e22f5125b9 100644 --- a/src/mesa/drivers/dri/i915/intel_buffers.h +++ b/src/mesa/drivers/dri/i915/intel_buffers.h @@ -34,7 +34,6 @@ #include "intel_context.h" struct intel_context; -struct intel_framebuffer; extern void intel_check_front_buffer_rendering(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c index 12cc7e3a71b..8750c601b84 100644 --- a/src/mesa/drivers/dri/i915/intel_fbo.c +++ b/src/mesa/drivers/dri/i915/intel_fbo.c @@ -64,19 +64,6 @@ intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) return NULL; } -/** - * Create a new framebuffer object. - */ -static struct gl_framebuffer * -intel_new_framebuffer(struct gl_context * ctx, GLuint name) -{ - /* Only drawable state in intel_framebuffer at this time, just use Mesa's - * class - */ - return _mesa_new_framebuffer(ctx, name); -} - - /** Called by gl_renderbuffer::Delete() */ static void intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) @@ -770,7 +757,6 @@ intel_blit_framebuffer(struct gl_context *ctx, void intel_fbo_init(struct intel_context *intel) { - intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer; intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer; intel->ctx.Driver.MapRenderbuffer = intel_map_renderbuffer; intel->ctx.Driver.UnmapRenderbuffer = intel_unmap_renderbuffer; -- cgit v1.2.3 From df75babf74d4174f334708e00eaf5399420bc9a1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 6 Oct 2015 10:55:03 -0700 Subject: radeon: Don't override NewFramebuffer just to call _mesa_new_framebuffer Signed-off-by: Ian Romanick Reviewed-by: Brian Paul Reviewed-by: Alex Deucher --- src/mesa/drivers/dri/radeon/radeon_fbo.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 5eece518c95..4d75d149b27 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -46,12 +46,6 @@ printf(__VA_ARGS__); \ } while(0) -static struct gl_framebuffer * -radeon_new_framebuffer(struct gl_context *ctx, GLuint name) -{ - return _mesa_new_framebuffer(ctx, name); -} - static void radeon_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) { @@ -868,7 +862,6 @@ radeon_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) void radeon_fbo_init(struct radeon_context *radeon) { - radeon->glCtx.Driver.NewFramebuffer = radeon_new_framebuffer; radeon->glCtx.Driver.NewRenderbuffer = radeon_new_renderbuffer; radeon->glCtx.Driver.MapRenderbuffer = radeon_map_renderbuffer; radeon->glCtx.Driver.UnmapRenderbuffer = radeon_unmap_renderbuffer; -- cgit v1.2.3 From 5c4ef9f1d2ae6a1824119aa246eaea727e294e3a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 6 Oct 2015 10:55:40 -0700 Subject: st/mesa: Don't override NewFramebuffer just to call _mesa_new_framebuffer v2: Since state_tracker does not call _mesa_init_driver_functions, we need to initialize the dd::NewFramebuffer pointer to _mesa_new_framebuffer here. Suggested by Brian. Signed-off-by: Ian Romanick Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_cb_fbo.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 9d06a232bfa..ff703fa41cb 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -245,17 +245,6 @@ st_renderbuffer_delete(struct gl_context *ctx, struct gl_renderbuffer *rb) } -/** - * Called via ctx->Driver.NewFramebuffer() - */ -static struct gl_framebuffer * -st_new_framebuffer(struct gl_context *ctx, GLuint name) -{ - /* XXX not sure we need to subclass gl_framebuffer for pipe */ - return _mesa_new_framebuffer(ctx, name); -} - - /** * Called via ctx->Driver.NewRenderbuffer() */ @@ -826,7 +815,7 @@ st_UnmapRenderbuffer(struct gl_context *ctx, void st_init_fbo_functions(struct dd_function_table *functions) { - functions->NewFramebuffer = st_new_framebuffer; + functions->NewFramebuffer = _mesa_new_framebuffer; functions->NewRenderbuffer = st_new_renderbuffer; functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw; functions->RenderTexture = st_render_texture; -- cgit v1.2.3 From ea8b77e892cdf6aa4cdd8a9ff312c422b3509ae7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 17 Sep 2015 10:26:45 -0400 Subject: mesa/i965: Refactor brw_is_front_buffer_{drawing,reading} to common code There are multiple similar implementations of these functions, and a later patch was going to add another. v2: Move removing intel_framebuffer to a different patch. Signed-off-by: Ian Romanick Reviewed-by: Iago Toral Quiroga --- src/mesa/drivers/dri/i965/brw_context.c | 15 ++++++++------- src/mesa/drivers/dri/i965/brw_draw.c | 3 ++- src/mesa/drivers/dri/i965/intel_buffers.c | 24 ++---------------------- src/mesa/drivers/dri/i965/intel_buffers.h | 3 --- src/mesa/main/framebuffer.c | 19 +++++++++++++++++++ src/mesa/main/framebuffer.h | 6 ++++++ 6 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 87c84c6236b..6b2bbd21703 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -41,6 +41,7 @@ #include "main/version.h" #include "main/vtxfmt.h" #include "main/texobj.h" +#include "main/framebuffer.h" #include "vbo/vbo_context.h" @@ -1298,7 +1299,7 @@ intel_prepare_render(struct brw_context *brw) * that will happen next will probably dirty the front buffer. So * mark it as dirty here. */ - if (brw_is_front_buffer_drawing(ctx->DrawBuffer)) + if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) brw->front_buffer_dirty = true; } @@ -1337,8 +1338,8 @@ intel_query_dri2_buffers(struct brw_context *brw, back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); memset(attachments, 0, sizeof(attachments)); - if ((brw_is_front_buffer_drawing(fb) || - brw_is_front_buffer_reading(fb) || + if ((_mesa_is_front_buffer_drawing(fb) || + _mesa_is_front_buffer_reading(fb) || !back_rb) && front_rb) { /* If a fake front buffer is in use, then querying for * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from @@ -1452,7 +1453,7 @@ intel_process_dri2_buffer(struct brw_context *brw, drawable->w, drawable->h, buffer->pitch); - if (brw_is_front_buffer_drawing(fb) && + if (_mesa_is_front_buffer_drawing(fb) && (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) && rb->Base.Base.NumSamples > 1) { @@ -1510,7 +1511,7 @@ intel_update_image_buffer(struct brw_context *intel, buffer->width, buffer->height, buffer->pitch); - if (brw_is_front_buffer_drawing(fb) && + if (_mesa_is_front_buffer_drawing(fb) && buffer_type == __DRI_IMAGE_BUFFER_FRONT && rb->Base.Base.NumSamples > 1) { intel_renderbuffer_upsample(intel, rb); @@ -1538,8 +1539,8 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) else return; - if (front_rb && (brw_is_front_buffer_drawing(fb) || - brw_is_front_buffer_reading(fb) || !back_rb)) { + if (front_rb && (_mesa_is_front_buffer_drawing(fb) || + _mesa_is_front_buffer_reading(fb) || !back_rb)) { buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; } diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index c0517e6c0fb..39a26b05201 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -33,6 +33,7 @@ #include "main/enums.h" #include "main/macros.h" #include "main/transformfeedback.h" +#include "main/framebuffer.h" #include "tnl/tnl.h" #include "vbo/vbo_context.h" #include "swrast/swrast.h" @@ -364,7 +365,7 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH]; - if (brw_is_front_buffer_drawing(fb)) + if (_mesa_is_front_buffer_drawing(fb)) front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); if (front_irb) diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c index c98e19382c3..fd522cc4f4d 100644 --- a/src/mesa/drivers/dri/i965/intel_buffers.c +++ b/src/mesa/drivers/dri/i965/intel_buffers.c @@ -32,30 +32,10 @@ #include "main/framebuffer.h" #include "main/renderbuffer.h" - -bool -brw_is_front_buffer_reading(struct gl_framebuffer *fb) -{ - if (!fb || _mesa_is_user_fbo(fb)) - return false; - - return fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT; -} - -bool -brw_is_front_buffer_drawing(struct gl_framebuffer *fb) -{ - if (!fb || _mesa_is_user_fbo(fb)) - return false; - - return (fb->_NumColorDrawBuffers >= 1 && - fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT); -} - static void intelDrawBuffer(struct gl_context * ctx, GLenum mode) { - if (brw_is_front_buffer_drawing(ctx->DrawBuffer)) { + if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) { struct brw_context *const brw = brw_context(ctx); /* If we might be front-buffer rendering on this buffer for the first @@ -71,7 +51,7 @@ intelDrawBuffer(struct gl_context * ctx, GLenum mode) static void intelReadBuffer(struct gl_context * ctx, GLenum mode) { - if (brw_is_front_buffer_reading(ctx->ReadBuffer)) { + if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) { struct brw_context *const brw = brw_context(ctx); /* If we might be front-buffer reading on this buffer for the first diff --git a/src/mesa/drivers/dri/i965/intel_buffers.h b/src/mesa/drivers/dri/i965/intel_buffers.h index 016039a628d..0e0d9c31f2b 100644 --- a/src/mesa/drivers/dri/i965/intel_buffers.h +++ b/src/mesa/drivers/dri/i965/intel_buffers.h @@ -32,7 +32,4 @@ extern void intelInitBufferFuncs(struct dd_function_table *functions); -bool brw_is_front_buffer_reading(struct gl_framebuffer *fb); -bool brw_is_front_buffer_drawing(struct gl_framebuffer *fb); - #endif /* INTEL_BUFFERS_H */ diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 5b6b3f64581..d18166d528e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -964,3 +964,22 @@ _mesa_print_framebuffer(const struct gl_framebuffer *fb) } } } + +bool +_mesa_is_front_buffer_reading(const struct gl_framebuffer *fb) +{ + if (!fb || _mesa_is_user_fbo(fb)) + return false; + + return fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT; +} + +bool +_mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb) +{ + if (!fb || _mesa_is_user_fbo(fb)) + return false; + + return (fb->_NumColorDrawBuffers >= 1 && + fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT); +} diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index 08e43222045..bfc8a0836e7 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -139,4 +139,10 @@ _mesa_get_read_renderbuffer_for_format(const struct gl_context *ctx, extern void _mesa_print_framebuffer(const struct gl_framebuffer *fb); +extern bool +_mesa_is_front_buffer_reading(const struct gl_framebuffer *fb); + +extern bool +_mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb); + #endif /* FRAMEBUFFER_H */ -- cgit v1.2.3 From 3bcc780126ec3a479429a42befa27f141ebf8d48 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 23 Mar 2015 14:47:34 +0200 Subject: i915: Drop broken front_buffer_reading/drawing optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bring the following commit over to i915: commit ec542d74578bbef6b55125dd6aba1dc7f5079e65 Author: Eric Anholt Date: Mon Mar 3 10:43:10 2014 -0800 i965: Drop broken front_buffer_reading/drawing optimization. Not sure if it might fix anything, but since the i965 and i915 used to share a bunch of that code, it would seem reasonable the same problems could be present in the i915 code still, and the i965 approach is well tested by now so bringing it over seems fairly safe. No piglit regressions on 855. v2: Rebase on _mesa_is_front_buffer_* refactor. Signed-off-by: Ville Syrjälä Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i915/intel_buffers.c | 28 ++++++++-------------------- src/mesa/drivers/dri/i915/intel_context.c | 12 +++++++----- src/mesa/drivers/dri/i915/intel_context.h | 16 ---------------- 3 files changed, 15 insertions(+), 41 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_buffers.c b/src/mesa/drivers/dri/i915/intel_buffers.c index 51eaea43a50..386e032443a 100644 --- a/src/mesa/drivers/dri/i915/intel_buffers.c +++ b/src/mesa/drivers/dri/i915/intel_buffers.c @@ -55,20 +55,14 @@ intel_check_front_buffer_rendering(struct intel_context *intel) static void intelDrawBuffer(struct gl_context * ctx, GLenum mode) { - if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) { struct intel_context *const intel = intel_context(ctx); - const bool was_front_buffer_rendering = - intel->is_front_buffer_rendering; - intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) - || (mode == GL_FRONT) || (mode == GL_FRONT_AND_BACK); - - /* If we weren't front-buffer rendering before but we are now, - * invalidate our DRI drawable so we'll ask for new buffers + /* If we might be front-buffer rendering on this buffer for the first + * time, invalidate our DRI drawable so we'll ask for new buffers * (including the fake front) before we start rendering again. */ - if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) - dri2InvalidateDrawable(intel->driContext->driDrawablePriv); + dri2InvalidateDrawable(intel->driContext->driDrawablePriv); } intel_draw_buffer(ctx); @@ -78,20 +72,14 @@ intelDrawBuffer(struct gl_context * ctx, GLenum mode) static void intelReadBuffer(struct gl_context * ctx, GLenum mode) { - if (ctx->ReadBuffer && _mesa_is_winsys_fbo(ctx->ReadBuffer)) { + if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) { struct intel_context *const intel = intel_context(ctx); - const bool was_front_buffer_reading = - intel->is_front_buffer_reading; - - intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT) - || (mode == GL_FRONT); - /* If we weren't front-buffer reading before but we are now, - * invalidate our DRI drawable so we'll ask for new buffers + /* If we might be front-buffer reading on this buffer for the first + * time, invalidate our DRI drawable so we'll ask for new buffers * (including the fake front) before we start reading again. */ - if (!was_front_buffer_reading && intel->is_front_buffer_reading) - dri2InvalidateDrawable(intel->driContext->driReadablePriv); + dri2InvalidateDrawable(intel->driContext->driReadablePriv); } } diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index 6c737ea6877..644bede9d47 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -243,7 +243,7 @@ intel_prepare_render(struct intel_context *intel) * that will happen next will probably dirty the front buffer. So * mark it as dirty here. */ - if (intel->is_front_buffer_rendering) + if (_mesa_is_front_buffer_drawing(intel->ctx.DrawBuffer)) intel->front_buffer_dirty = true; /* Wait for the swapbuffers before the one we just emitted, so we @@ -356,7 +356,7 @@ intel_glFlush(struct gl_context *ctx) intel_flush(ctx); intel_flush_front(ctx); - if (intel->is_front_buffer_rendering) + if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) intel->need_throttle = true; } @@ -700,8 +700,8 @@ intel_query_dri2_buffers(struct intel_context *intel, back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); memset(attachments, 0, sizeof(attachments)); - if ((intel->is_front_buffer_rendering || - intel->is_front_buffer_reading || + if ((_mesa_is_front_buffer_drawing(fb) || + _mesa_is_front_buffer_reading(fb) || !back_rb) && front_rb) { /* If a fake front buffer is in use, then querying for * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from @@ -866,8 +866,10 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable) else return; - if ((intel->is_front_buffer_rendering || intel->is_front_buffer_reading || !back_rb) && front_rb) + if (front_rb && (_mesa_is_front_buffer_drawing(fb) || + _mesa_is_front_buffer_reading(fb) || !back_rb)) { buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; + } if (back_rb) buffer_mask |= __DRI_IMAGE_BUFFER_BACK; diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h index 4ec4015d453..aecd7c23f45 100644 --- a/src/mesa/drivers/dri/i915/intel_context.h +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -255,22 +255,6 @@ struct intel_context */ bool front_buffer_dirty; - /** - * Track whether front-buffer rendering is currently enabled - * - * A separate flag is used to track this in order to support MRT more - * easily. - */ - bool is_front_buffer_rendering; - /** - * Track whether front-buffer is the current read target. - * - * This is closely associated with is_front_buffer_rendering, but may - * be set separately. The DRI2 fake front buffer must be referenced - * either way. - */ - bool is_front_buffer_reading; - bool use_early_z; __DRIcontext *driContext; -- cgit v1.2.3 From 83f9f911b2c55a9ca1a27fd665c10913e6dd6291 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Fri, 10 Apr 2015 13:41:16 -0700 Subject: i915: remove unneeded #include of colormac.h Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i915/i915_vtbl.c | 1 - src/mesa/drivers/dri/i915/intel_state.c | 1 - 2 files changed, 2 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 80bd249fa7b..c41cd37bcc2 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -31,7 +31,6 @@ #include "main/mtypes.h" #include "main/imports.h" #include "main/macros.h" -#include "main/colormac.h" #include "main/renderbuffer.h" #include "main/framebuffer.h" diff --git a/src/mesa/drivers/dri/i915/intel_state.c b/src/mesa/drivers/dri/i915/intel_state.c index c951ff731b8..3de9d50a4d7 100644 --- a/src/mesa/drivers/dri/i915/intel_state.c +++ b/src/mesa/drivers/dri/i915/intel_state.c @@ -30,7 +30,6 @@ #include "main/context.h" #include "main/macros.h" #include "main/enums.h" -#include "main/colormac.h" #include "main/dd.h" #include "intel_screen.h" -- cgit v1.2.3 From eb6b80842ffc27f138cc0221c5465c51d0105c04 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Fri, 10 Apr 2015 13:41:17 -0700 Subject: i965: remove unneeded #include of colormac.h Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/intel_blit.c | 1 - src/mesa/drivers/dri/i965/intel_state.c | 1 - 2 files changed, 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 46fccc8d6ce..bd204aa3ce8 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -27,7 +27,6 @@ #include "main/blit.h" #include "main/context.h" #include "main/enums.h" -#include "main/colormac.h" #include "main/fbobject.h" #include "brw_context.h" diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c index 498cab49ec4..2f5c901fdf8 100644 --- a/src/mesa/drivers/dri/i965/intel_state.c +++ b/src/mesa/drivers/dri/i965/intel_state.c @@ -27,7 +27,6 @@ #include "main/context.h" #include "main/macros.h" #include "main/enums.h" -#include "main/colormac.h" #include "main/dd.h" #include "intel_screen.h" -- cgit v1.2.3 From 3475b68abd8791324f201a141ba5263fcb91e2d0 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Fri, 10 Apr 2015 13:41:18 -0700 Subject: radeon/r200: remove unneeded #include of colormac.h Reviewed-by: Matt Turner --- src/mesa/drivers/dri/r200/r200_context.h | 1 - src/mesa/drivers/dri/r200/r200_maos_arrays.c | 1 - src/mesa/drivers/dri/r200/r200_state.c | 1 - src/mesa/drivers/dri/r200/r200_state_init.c | 1 - src/mesa/drivers/dri/r200/r200_swtcl.c | 1 - src/mesa/drivers/dri/r200/r200_tcl.c | 1 - src/mesa/drivers/dri/r200/r200_tex.c | 1 - src/mesa/drivers/dri/radeon/radeon_context.h | 1 - src/mesa/drivers/dri/radeon/radeon_swtcl.c | 1 - src/mesa/drivers/dri/radeon/radeon_tex.c | 1 - src/mesa/drivers/dri/radeon/radeon_texstate.c | 1 - 11 files changed, 11 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index c02a4f399ee..7c6f48008a1 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -42,7 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/mtypes.h" -#include "main/colormac.h" #include "r200_reg.h" #include "r200_vertprog.h" diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c index 3cfc03d10cc..9b16cf84cf5 100644 --- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c +++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c @@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/mtypes.h" -#include "main/colormac.h" #include "main/imports.h" #include "main/macros.h" diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 3038c634aff..b4acf985ee8 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -37,7 +37,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/api_arrayelt.h" #include "main/enums.h" -#include "main/colormac.h" #include "main/light.h" #include "main/framebuffer.h" #include "main/fbobject.h" diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index ad64f788b9f..8cffa92c10b 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/imports.h" #include "main/enums.h" -#include "main/colormac.h" #include "main/api_arrayelt.h" #include "swrast/swrast.h" diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index bb9be210567..72f09ae4056 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/mtypes.h" -#include "main/colormac.h" #include "main/enums.h" #include "main/image.h" #include "main/imports.h" diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 747275334b6..c042aae0ef3 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -36,7 +36,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/mtypes.h" #include "main/enums.h" -#include "main/colormac.h" #include "main/light.h" #include "main/state.h" diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index feee0b2ba3f..ca921100c12 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -33,7 +33,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/imports.h" -#include "main/colormac.h" #include "main/context.h" #include "main/enums.h" #include "main/image.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index badabd9508c..88a295386ca 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -49,7 +49,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_drm.h" #include "main/macros.h" #include "main/mtypes.h" -#include "main/colormac.h" #include "radeon_screen.h" #include "radeon_common.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index d7a02e90266..1e19cf7c7c0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -34,7 +34,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/mtypes.h" -#include "main/colormac.h" #include "main/enums.h" #include "main/imports.h" #include "main/macros.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 0955a135de8..d1aa1a18737 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -33,7 +33,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/imports.h" -#include "main/colormac.h" #include "main/context.h" #include "main/enums.h" #include "main/image.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index ec835f248eb..35b1538d9e1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -35,7 +35,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/imports.h" -#include "main/colormac.h" #include "main/context.h" #include "main/macros.h" #include "main/teximage.h" -- cgit v1.2.3 From 38610102137d546afc3f543604414f061f1aec88 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Fri, 10 Apr 2015 13:41:19 -0700 Subject: mesa: remove unneeded #include of colormac.h Reviewed-by: Matt Turner --- src/mesa/main/pack.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 7147fd6e4fe..ef89e751a4c 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -43,7 +43,6 @@ #include "glheader.h" -#include "colormac.h" #include "enums.h" #include "image.h" #include "imports.h" -- cgit v1.2.3 From 922e0680f9152aaacde12636d91b97346b05d61c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 6 Oct 2015 16:12:50 -0700 Subject: vc4: Fix a memory leak in the simulator case. We validate per draw call, and need to free the shader per draw call, too. --- src/gallium/drivers/vc4/vc4_simulator.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 7cfd236349d..76980ca32af 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -106,10 +106,15 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec) { for (int i = 0; i < exec->bo_count; i++) { struct drm_gem_cma_object *obj = exec->bo[i]; - struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo; + struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base); + struct vc4_bo *bo = drm_bo->bo; memcpy(bo->map, obj->vaddr, bo->size); + if (drm_bo->validated_shader) { + free(drm_bo->validated_shader->texture_samples); + free(drm_bo->validated_shader); + } free(obj); } -- cgit v1.2.3 From b6cd39fc4761a3a6276a373e68510fb7fb735a77 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 6 Oct 2015 16:32:03 -0700 Subject: vc4: Fix a leak of the last color read/write surface on context destroy. --- src/gallium/drivers/vc4/vc4_context.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 87d781d088d..d490fad5167 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -188,6 +188,9 @@ vc4_context_destroy(struct pipe_context *pctx) pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL); pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL); + pipe_surface_reference(&vc4->color_write, NULL); + pipe_surface_reference(&vc4->color_read, NULL); + vc4_program_fini(pctx); ralloc_free(vc4); -- cgit v1.2.3 From 64d9d4b73031d1bc23ae869e2afad86ed01edea5 Mon Sep 17 00:00:00 2001 From: Boyan Ding Date: Sun, 27 Sep 2015 17:16:57 +0800 Subject: vc4: use nir two-sided-color lowering Similar to 9ffc1049ca (freedreno/ir3: use nir two-sided-color lowering). No piglit regression. Signed-off-by: Boyan Ding Reviewed-by: Eric Anholt --- src/gallium/drivers/vc4/vc4_context.h | 1 - src/gallium/drivers/vc4/vc4_program.c | 25 ++----------------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 7502293180a..c7698422951 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -103,7 +103,6 @@ struct vc4_uncompiled_shader { /** How many variants of this program were compiled, for shader-db. */ uint32_t compiled_variant_count; struct pipe_shader_state base; - const struct tgsi_token *twoside_tokens; }; struct vc4_ubo_range { diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 01ea7544984..31c7e28ff57 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1738,27 +1738,6 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, } const struct tgsi_token *tokens = key->shader_state->base.tokens; - if (c->fs_key && c->fs_key->light_twoside) { - if (!key->shader_state->twoside_tokens) { - const struct tgsi_lowering_config lowering_config = { - .color_two_side = true, - }; - struct tgsi_shader_info info; - key->shader_state->twoside_tokens = - tgsi_transform_lowering(&lowering_config, - key->shader_state->base.tokens, - &info); - - /* If no transformation occurred, then NULL is - * returned and we just use our original tokens. - */ - if (!key->shader_state->twoside_tokens) { - key->shader_state->twoside_tokens = - key->shader_state->base.tokens; - } - } - tokens = key->shader_state->twoside_tokens; - } if (vc4_debug & VC4_DEBUG_TGSI) { fprintf(stderr, "%s prog %d/%d TGSI:\n", @@ -1772,6 +1751,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, nir_convert_to_ssa(c->s); if (stage == QSTAGE_FRAG) vc4_nir_lower_blend(c); + if (c->fs_key && c->fs_key->light_twoside) + nir_lower_two_sided_color(c->s); vc4_nir_lower_io(c); nir_lower_idiv(c->s); nir_lower_load_const_to_scalar(c->s); @@ -2190,8 +2171,6 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso) hash_table_foreach(vc4->vs_cache, entry) delete_from_cache_if_matches(vc4->vs_cache, entry, so); - if (so->twoside_tokens != so->base.tokens) - free((void *)so->twoside_tokens); free((void *)so->base.tokens); free(so); } -- cgit v1.2.3 From 47d11990b2ca3eb666b8ac81fee7f7eb5019eba1 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 5 Oct 2015 15:19:05 -0400 Subject: nouveau: make sure there's always room to emit a fence I started seeing a lot of situations on nv30 where fence emission wouldn't fit into the previous buffer (causing assertions). This ensures that whenever checking for space, we always leave a bit of extra room for the fence emission commands. Adjusts the nv30 and nvc0 fence emission logic to bypass the space checking as well. Signed-off-by: Ilia Mirkin Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nouveau_winsys.h | 2 ++ src/gallium/drivers/nouveau/nv30/nv30_screen.c | 4 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 ++- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 389a229eb78..a44fd3efcf7 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push) static inline bool PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size) { + /* Provide a buffer so that fences always have room to be emitted */ + size += 8; if (PUSH_AVAIL(push) < size) return nouveau_pushbuf_space(push, size, 0, 0) == 0; return true; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 39267b354e3..335c163b661 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -347,7 +347,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence) *sequence = ++screen->base.fence.sequence; - BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2); + assert(PUSH_AVAIL(push) >= 3); + PUSH_DATA (push, NV30_3D_FENCE_OFFSET | + (2 /* size */ << 18) | (7 /* subchan */ << 13)); PUSH_DATA (push, 0); PUSH_DATA (push, *sequence); } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 6012ff6fcb7..812b246ea0e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -388,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) /* we need to do it after possible flush in MARK_RING */ *sequence = ++screen->base.fence.sequence; + assert(PUSH_AVAIL(push) >= 5); PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4)); PUSH_DATAh(push, screen->fence.bo->offset); PUSH_DATA (push, screen->fence.bo->offset); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 32da76c88f6..afd91e6feee 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -537,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) /* we need to do it after possible flush in MARK_RING */ *sequence = ++screen->base.fence.sequence; - BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4); + assert(PUSH_AVAIL(push) >= 5); + PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4)); PUSH_DATAh(push, screen->fence.bo->offset); PUSH_DATA (push, screen->fence.bo->offset); PUSH_DATA (push, *sequence); -- cgit v1.2.3 From f988eff37991272b3e685112136a8b2ae06386bf Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Wed, 7 Oct 2015 09:48:14 +0530 Subject: egl: restore surface type before linking config to its display commit c2c2e9a (egl: implement EGL_KHR_gl_colorspace (v2)) leaves _EGLConfig->SurfaceType set incorrectly before calling _eglLinkConfig(), and the bad value is passed around to platform_android. set it to zero as earlier. v2: Set SurfaceType to 0, rather than surface_type (Suggested by Emil) Cc: mesa-stable@lists.freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91596 Signed-off-by: Varad Gautam Reviewed-by: Emil Velikov --- src/egl/drivers/dri2/egl_dri2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 53f21a8eab1..aff21814be5 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -313,6 +313,8 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, else conf->dri_single_config = dri_config; } + + conf->base.SurfaceType = 0; conf->base.ConfigID = config_id; _eglLinkConfig(&conf->base); -- cgit v1.2.3 From deb1765ec626b6177f1bf7b2a24f10ed79cf6243 Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Wed, 7 Oct 2015 09:48:15 +0530 Subject: egl: move memcpy to bring conf->base operations together Signed-off-by: Varad Gautam Suggested-by: Emil Velikov Reviewed-by: Emil Velikov --- src/egl/drivers/dri2/egl_dri2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index aff21814be5..f600d1b606d 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -301,7 +301,6 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, if (conf == NULL) return NULL; - memcpy(&conf->base, &base, sizeof base); if (double_buffer) { if (srgb) conf->dri_srgb_double_config = dri_config; @@ -314,6 +313,7 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, conf->dri_single_config = dri_config; } + memcpy(&conf->base, &base, sizeof base); conf->base.SurfaceType = 0; conf->base.ConfigID = config_id; -- cgit v1.2.3 From ae6ff72f5a56d71887bd2c59128868f71e5e2e01 Mon Sep 17 00:00:00 2001 From: Matthew Waters Date: Mon, 14 Sep 2015 18:35:46 +0100 Subject: glapi: add function pointers for KHR_debug for gles v2 [Emil Velikov] - Rebase. - Correct version in gles11 dispatch_sanity. - Move the extension enable to a separate patch. Signed-off-by: Matthew Waters Signed-off-by: Emil Velikov --- src/mapi/glapi/gen/KHR_debug.xml | 73 +++++++++++++++++++++++++++++++++ src/mesa/main/tests/dispatch_sanity.cpp | 25 +++++++++++ 2 files changed, 98 insertions(+) diff --git a/src/mapi/glapi/gen/KHR_debug.xml b/src/mapi/glapi/gen/KHR_debug.xml index 77956d61e38..e4a5a5b01e6 100644 --- a/src/mapi/glapi/gen/KHR_debug.xml +++ b/src/mapi/glapi/gen/KHR_debug.xml @@ -145,6 +145,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index b19c6d74bc0..ac2d2332df8 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2039,6 +2039,19 @@ const struct function gles11_functions_possible[] = { { "glUnmapBufferOES", 11, -1 }, { "glVertexPointer", 11, _gloffset_VertexPointer }, { "glViewport", 11, _gloffset_Viewport }, + + /* GL_KHR_debug */ + { "glPushDebugGroupKHR", 11, -1 }, + { "glPopDebugGroupKHR", 11, -1 }, + { "glDebugMessageCallbackKHR", 11, -1 }, + { "glDebugMessageControlKHR", 11, -1 }, + { "glDebugMessageInsertKHR", 11, -1 }, + { "glGetDebugMessageLogKHR", 11, -1 }, + { "glGetObjectLabelKHR", 11, -1 }, + { "glGetObjectPtrLabelKHR", 11, -1 }, + { "glObjectLabelKHR", 11, -1 }, + { "glObjectPtrLabelKHR", 11, -1 }, + { NULL, 0, -1 } }; @@ -2262,6 +2275,18 @@ const struct function gles2_functions_possible[] = { { "glEndPerfQueryINTEL", 20, -1 }, { "glGetPerfQueryDataINTEL", 20, -1 }, + /* GL_KHR_debug */ + { "glPushDebugGroupKHR", 20, -1 }, + { "glPopDebugGroupKHR", 20, -1 }, + { "glDebugMessageCallbackKHR", 20, -1 }, + { "glDebugMessageControlKHR", 20, -1 }, + { "glDebugMessageInsertKHR", 20, -1 }, + { "glGetDebugMessageLogKHR", 20, -1 }, + { "glGetObjectLabelKHR", 20, -1 }, + { "glGetObjectPtrLabelKHR", 20, -1 }, + { "glObjectLabelKHR", 20, -1 }, + { "glObjectPtrLabelKHR", 20, -1 }, + { NULL, 0, -1 } }; -- cgit v1.2.3 From 70643a1389179c214b454a6a49e9f47a768ba904 Mon Sep 17 00:00:00 2001 From: Matthew Waters Date: Wed, 16 Sep 2015 16:38:27 +0100 Subject: main/get: make KHR_debug enums available everywhere Move all the enums but CONTEXT_FLAGS. The spec seems quite explicit about the latter (wrt OpenGL ES) "In OpenGL ES versions prior to and including ES 3.1 there is no CONTEXT_FLAGS state and therefore the CONTEXT_FLAG_DEBUG_BIT cannot be queried." v2 [Emil Velikov] Rebase. v3 [Emil Veliokv] Drop the CONTEXT_FLAGS hunk - not applicable for GLES Signed-off-by: Matthew Waters Signed-off-by: Emil Velikov --- src/mesa/main/get_hash_params.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 38b08b02a65..c295615b475 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -124,6 +124,15 @@ descriptor=[ # GL_EXT_texture_filter_anisotropic [ "MAX_TEXTURE_MAX_ANISOTROPY_EXT", "CONTEXT_FLOAT(Const.MaxTextureMaxAnisotropy), extra_EXT_texture_filter_anisotropic" ], + +# GL_KHR_debug (GL 4.3)/ GL_ARB_debug_output + [ "DEBUG_LOGGED_MESSAGES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], + [ "DEBUG_NEXT_LOGGED_MESSAGE_LENGTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], + [ "MAX_DEBUG_LOGGED_MESSAGES", "CONST(MAX_DEBUG_LOGGED_MESSAGES), NO_EXTRA" ], + [ "MAX_DEBUG_MESSAGE_LENGTH", "CONST(MAX_DEBUG_MESSAGE_LENGTH), NO_EXTRA" ], + [ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ], + [ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ], + [ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], ]}, # Enums in OpenGL and GLES1 @@ -791,15 +800,6 @@ descriptor=[ # GL_ARB_robustness [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ], -# GL_KHR_debug (GL 4.3)/ GL_ARB_debug_output - [ "DEBUG_LOGGED_MESSAGES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], - [ "DEBUG_NEXT_LOGGED_MESSAGE_LENGTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], - [ "MAX_DEBUG_LOGGED_MESSAGES", "CONST(MAX_DEBUG_LOGGED_MESSAGES), NO_EXTRA" ], - [ "MAX_DEBUG_MESSAGE_LENGTH", "CONST(MAX_DEBUG_MESSAGE_LENGTH), NO_EXTRA" ], - [ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ], - [ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ], - [ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], - [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ], # GL_ARB_uniform_buffer_object -- cgit v1.2.3 From b69cfbdf18fa64606a76761b20bc268f4ac731e5 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 14 Sep 2015 18:35:48 +0100 Subject: mesa: enable KHR_debug for ES contexts Signed-off-by: Emil Velikov --- docs/relnotes/11.1.0.html | 3 ++- src/mesa/main/extensions.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index c755c98f6eb..88c34dd808d 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -45,11 +45,12 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_blend_func_extended on freedreno (a3xx)
  • +
  • GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
  • GL_ARB_shader_storage_buffer_object on i965
  • GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi
  • GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
  • GL_ARB_texture_query_lod on softpipe
  • -
  • GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
  • +
  • GL_KHR_debug (GLES)

Bug fixes

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index b2c88c37366..281c640901e 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -342,7 +342,7 @@ static const struct extension extension_table[] = { { "GL_OES_vertex_array_object", o(dummy_true), ES1 | ES2, 2010 }, /* KHR extensions */ - { "GL_KHR_debug", o(dummy_true), GL, 2012 }, + { "GL_KHR_debug", o(dummy_true), GL | ES1 | ES2, 2012 }, { "GL_KHR_context_flush_control", o(dummy_true), GL | ES2, 2014 }, { "GL_KHR_texture_compression_astc_hdr", o(KHR_texture_compression_astc_hdr), GL | ES2, 2012 }, { "GL_KHR_texture_compression_astc_ldr", o(KHR_texture_compression_astc_ldr), GL | ES2, 2012 }, -- cgit v1.2.3 From 858f2f2ae6d72f338fdd6d544b0c733814e22724 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 13 Sep 2015 12:25:27 +0100 Subject: egl/dri2: ease srgb __DRIconfig conditionals One can simplify the if-else chain, by declaring the driconfigs as a two sized array, whist using srgb as a index to the correct entry. Signed-off-by: Emil Velikov Acked-by: Alex Deucher --- src/egl/drivers/dri2/egl_dri2.c | 37 ++++++++++++------------------------- src/egl/drivers/dri2/egl_dri2.h | 6 ++---- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index f600d1b606d..229285fbbe6 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -131,12 +131,10 @@ const __DRIconfig * dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type, EGLenum colorspace) { - if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR) - return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config : - conf->dri_srgb_single_config; - else - return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config : - conf->dri_single_config; + const bool srgb = colorspace == EGL_GL_COLORSPACE_SRGB_KHR; + + return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config[srgb] : + conf->dri_single_config[srgb]; } static EGLBoolean @@ -284,14 +282,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, if (num_configs == 1) { conf = (struct dri2_egl_config *) matching_config; - if (double_buffer && srgb && !conf->dri_srgb_double_config) - conf->dri_srgb_double_config = dri_config; - else if (double_buffer && !srgb && !conf->dri_double_config) - conf->dri_double_config = dri_config; - else if (!double_buffer && srgb && !conf->dri_srgb_single_config) - conf->dri_srgb_single_config = dri_config; - else if (!double_buffer && !srgb && !conf->dri_single_config) - conf->dri_single_config = dri_config; + if (double_buffer && !conf->dri_double_config[srgb]) + conf->dri_double_config[srgb] = dri_config; + else if (!double_buffer && !conf->dri_single_config[srgb]) + conf->dri_single_config[srgb] = dri_config; else /* a similar config type is already added (unlikely) => discard */ return NULL; @@ -301,17 +295,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, if (conf == NULL) return NULL; - if (double_buffer) { - if (srgb) - conf->dri_srgb_double_config = dri_config; - else - conf->dri_double_config = dri_config; - } else { - if (srgb) - conf->dri_srgb_single_config = dri_config; - else - conf->dri_single_config = dri_config; - } + if (double_buffer) + conf->dri_double_config[srgb] = dri_config; + else + conf->dri_single_config[srgb] = dri_config; memcpy(&conf->base, &base, sizeof base); conf->base.SurfaceType = 0; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 9aa2a8c1003..0e837b3eb8b 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -284,10 +284,8 @@ struct dri2_egl_surface struct dri2_egl_config { _EGLConfig base; - const __DRIconfig *dri_single_config; - const __DRIconfig *dri_double_config; - const __DRIconfig *dri_srgb_single_config; - const __DRIconfig *dri_srgb_double_config; + const __DRIconfig *dri_single_config[2]; + const __DRIconfig *dri_double_config[2]; }; struct dri2_egl_image -- cgit v1.2.3 From 4ea5ed9f51c2ec851ac2d81108035bf7046bbc69 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sun, 13 Sep 2015 12:36:54 +0100 Subject: egl/dri2: enable EGL_KHR_gl_colorspace for swrast No driver changes needed for softpipe/llvmpipe - things just work. v2: Whitespace fixes. Signed-off-by: Emil Velikov Reviewed-by: Boyan Ding Acked-by: Alex Deucher --- docs/relnotes/11.1.0.html | 1 + src/egl/drivers/dri2/platform_drm.c | 14 +++++++------- src/egl/drivers/dri2/platform_wayland.c | 9 ++++++--- src/egl/drivers/dri2/platform_x11.c | 12 ++++++------ 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 88c34dd808d..543ddc7958b 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -51,6 +51,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
  • GL_ARB_texture_query_lod on softpipe
  • GL_KHR_debug (GLES)
  • +
  • EGL_KHR_gl_colorspace on softpipe, llvmpipe
  • Bug fixes

    diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index 050c309dceb..815d2674cb2 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -101,6 +101,7 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, struct dri2_egl_surface *dri2_surf; struct gbm_surface *window = native_window; struct gbm_dri_surface *surf; + const __DRIconfig *config; (void) drv; @@ -130,21 +131,20 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, goto cleanup_surf; } - if (dri2_dpy->dri2) { - const __DRIconfig *config = - dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, - dri2_surf->base.GLColorspace); + config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, + dri2_surf->base.GLColorspace); + if (dri2_dpy->dri2) { dri2_surf->dri_drawable = (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config, dri2_surf->gbm_surf); } else { assert(dri2_dpy->swrast != NULL); + dri2_surf->dri_drawable = - (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen, - dri2_conf->dri_double_config, - dri2_surf->gbm_surf); + (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config, + dri2_surf->gbm_surf); } if (dri2_surf->dri_drawable == NULL) { diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 6cf5461d52c..0d161f617a1 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1645,6 +1645,7 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, struct dri2_egl_config *dri2_conf = dri2_egl_config(conf); struct wl_egl_window *window = native_window; struct dri2_egl_surface *dri2_surf; + const __DRIconfig *config; (void) drv; @@ -1669,10 +1670,12 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, dri2_surf->base.Width = -1; dri2_surf->base.Height = -1; + config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, + dri2_surf->base.GLColorspace); + dri2_surf->dri_drawable = - (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen, - dri2_conf->dri_double_config, - dri2_surf); + (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, + config, dri2_surf); if (dri2_surf->dri_drawable == NULL) { _eglError(EGL_BAD_ALLOC, "swrast->createNewDrawable"); goto cleanup_dri_drawable; diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 7991fc2b67b..88a06a8c6a8 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -206,6 +206,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, xcb_generic_error_t *error; xcb_drawable_t drawable; xcb_screen_t *screen; + const __DRIconfig *config; STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface)); drawable = (uintptr_t) native_surface; @@ -245,19 +246,18 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, dri2_surf->drawable = drawable; } - if (dri2_dpy->dri2) { - const __DRIconfig *config = - dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace); + config = dri2_get_dri_config(dri2_conf, type, + dri2_surf->base.GLColorspace); + if (dri2_dpy->dri2) { dri2_surf->dri_drawable = (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config, dri2_surf); } else { assert(dri2_dpy->swrast); dri2_surf->dri_drawable = - (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen, - dri2_conf->dri_double_config, - dri2_surf); + (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config, + dri2_surf); } if (dri2_surf->dri_drawable == NULL) { -- cgit v1.2.3 From 2bad030ac9c47c316f615db83c52cf0391f64f3f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 6 Oct 2015 16:55:39 -0600 Subject: svga: round UBO constant buffer size up/down to multiple of 16 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The svga3d device requires constant buffers to be a multiple of 16 bytes in size. OpenGL UBOs may not fit that restriction. As a work-around, round the size up if possible, else round down. Note that this patch only effects UBO constant buffers (index 1 or higher), not the 0th/default constant buffer. Fixes the game Grim Fandango Remastered. VMware bug 1510130. Reviewed-by: Charmaine Lee Reviewed-by: José Fonseca --- src/gallium/drivers/svga/svga_state_constants.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index b6d6de0dca3..75592d3bf8b 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -704,6 +704,24 @@ emit_consts_vgpu10(struct svga_context *svga, unsigned shader) assert(size == 0); } + if (size % 16 != 0) { + /* GL's buffer range sizes can be any number of bytes but the + * SVGA3D device requires a multiple of 16 bytes. + */ + const unsigned total_size = buffer->b.b.width0; + + if (offset + align(size, 16) <= total_size) { + /* round up size to multiple of 16 */ + size = align(size, 16); + } + else { + /* round down to mulitple of 16 (this may cause rendering problems + * but should avoid a device error). + */ + size &= ~16; + } + } + assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, -- cgit v1.2.3 From a2bc4a7b04d6971e093b0d25caf04de11ee07045 Mon Sep 17 00:00:00 2001 From: Stefan Dösinger Date: Tue, 6 Oct 2015 16:55:39 -0600 Subject: mesa: Remove GL_ARB_sampler_object depth compare error checking. Version 3: Simplify the code comment, word wrap commit description. Version 2: Return GL_FALSE if ARB_shadow is unsupported instead of pretending to store the value as suggested by Brian Paul. This fixes a GL error warning on r200 in Wine. The GL_ARB_sampler_objects extension does not specify a dependency on GL_ARB_shadow or GL_ARB_depth_texture for setting the depth texture compare mode and function. Silently ignore attempts to change these settings. They won't matter without a depth texture being assigned anyway. Reviewed-by: Brian Paul --- src/mesa/main/samplerobj.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index 9bcba60fd6f..676dd367b3f 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -621,8 +621,12 @@ static GLuint set_sampler_compare_mode(struct gl_context *ctx, struct gl_sampler_object *samp, GLint param) { + /* If GL_ARB_shadow is not supported, don't report an error. The + * sampler object extension spec isn't clear on this extension interaction. + * Silences errors with Wine on older GPUs such as R200. + */ if (!ctx->Extensions.ARB_shadow) - return INVALID_PNAME; + return GL_FALSE; if (samp->CompareMode == param) return GL_FALSE; @@ -642,8 +646,12 @@ static GLuint set_sampler_compare_func(struct gl_context *ctx, struct gl_sampler_object *samp, GLint param) { + /* If GL_ARB_shadow is not supported, don't report an error. The + * sampler object extension spec isn't clear on this extension interaction. + * Silences errors with Wine on older GPUs such as R200. + */ if (!ctx->Extensions.ARB_shadow) - return INVALID_PNAME; + return GL_FALSE; if (samp->CompareFunc == param) return GL_FALSE; @@ -1329,13 +1337,9 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params) *params = IROUND(sampObj->LodBias); break; case GL_TEXTURE_COMPARE_MODE: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = sampObj->CompareMode; break; case GL_TEXTURE_COMPARE_FUNC: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = sampObj->CompareFunc; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: @@ -1418,13 +1422,9 @@ _mesa_GetSamplerParameterfv(GLuint sampler, GLenum pname, GLfloat *params) *params = sampObj->LodBias; break; case GL_TEXTURE_COMPARE_MODE: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = (GLfloat) sampObj->CompareMode; break; case GL_TEXTURE_COMPARE_FUNC: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = (GLfloat) sampObj->CompareFunc; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: @@ -1497,13 +1497,9 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params) *params = (GLint) sampObj->LodBias; break; case GL_TEXTURE_COMPARE_MODE: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = sampObj->CompareMode; break; case GL_TEXTURE_COMPARE_FUNC: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = sampObj->CompareFunc; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: @@ -1576,13 +1572,9 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params) *params = (GLuint) sampObj->LodBias; break; case GL_TEXTURE_COMPARE_MODE: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = sampObj->CompareMode; break; case GL_TEXTURE_COMPARE_FUNC: - if (!ctx->Extensions.ARB_shadow) - goto invalid_pname; *params = sampObj->CompareFunc; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: -- cgit v1.2.3 From 70c4cde453bc12be5262b88dcb26e97dcb8e0507 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 6 Oct 2015 16:55:39 -0600 Subject: svga: whitespace fixes in svga_resource_buffer.c --- src/gallium/drivers/svga/svga_resource_buffer.c | 53 +++++++++++++------------ 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 7ef36b367d3..57e37fcfe14 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -243,7 +243,7 @@ svga_buffer_transfer_map(struct pipe_context *pipe, } else { FREE(transfer); } - + return map; } @@ -275,9 +275,9 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe, struct svga_screen *ss = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); struct svga_buffer *sbuf = svga_buffer(transfer->resource); - + pipe_mutex_lock(ss->swc_mutex); - + assert(sbuf->map.count); if (sbuf->map.count) { --sbuf->map.count; @@ -296,7 +296,7 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe, */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); - + sbuf->dma.flags.discard = TRUE; svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); @@ -316,28 +316,28 @@ svga_buffer_destroy( struct pipe_screen *screen, struct svga_buffer *sbuf = svga_buffer( buf ); assert(!p_atomic_read(&buf->reference.count)); - + assert(!sbuf->dma.pending); - if(sbuf->handle) + if (sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); - - if(sbuf->uploaded.buffer) + + if (sbuf->uploaded.buffer) pipe_resource_reference(&sbuf->uploaded.buffer, NULL); - if(sbuf->hwbuf) + if (sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); - - if(sbuf->swbuf && !sbuf->user) + + if (sbuf->swbuf && !sbuf->user) align_free(sbuf->swbuf); - + ss->total_resource_bytes -= sbuf->size; FREE(sbuf); } -struct u_resource_vtbl svga_buffer_vtbl = +struct u_resource_vtbl svga_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ svga_buffer_destroy, /* resource_destroy */ @@ -355,11 +355,11 @@ svga_buffer_create(struct pipe_screen *screen, { struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf; - + sbuf = CALLOC_STRUCT(svga_buffer); - if(!sbuf) + if (!sbuf) goto error1; - + sbuf->b.b = *template; sbuf->b.vtbl = &svga_buffer_vtbl; pipe_reference_init(&sbuf->b.b.reference, 1); @@ -378,7 +378,7 @@ svga_buffer_create(struct pipe_screen *screen, } } - if(svga_buffer_needs_hw_storage(template->bind)) { + if (svga_buffer_needs_hw_storage(template->bind)) { /* If the buffer will be used for vertex/index/stream data, set all * the flags so that the buffer will be accepted for all those uses. @@ -396,22 +396,22 @@ svga_buffer_create(struct pipe_screen *screen, sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT; } - if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK) + if (svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK) goto error2; } else { sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64); - if(!sbuf->swbuf) + if (!sbuf->swbuf) goto error2; } - + debug_reference(&sbuf->b.b.reference, (debug_reference_descriptor)debug_describe_resource, 0); sbuf->size = util_resource_size(&sbuf->b.b); ss->total_resource_bytes += sbuf->size; - return &sbuf->b.b; + return &sbuf->b.b; error2: FREE(sbuf); @@ -419,6 +419,7 @@ error1: return NULL; } + struct pipe_resource * svga_user_buffer_create(struct pipe_screen *screen, void *ptr, @@ -426,11 +427,11 @@ svga_user_buffer_create(struct pipe_screen *screen, unsigned bind) { struct svga_buffer *sbuf; - + sbuf = CALLOC_STRUCT(svga_buffer); - if(!sbuf) + if (!sbuf) goto no_sbuf; - + pipe_reference_init(&sbuf->b.b.reference, 1); sbuf->b.vtbl = &svga_buffer_vtbl; sbuf->b.b.screen = screen; @@ -448,8 +449,8 @@ svga_user_buffer_create(struct pipe_screen *screen, debug_reference(&sbuf->b.b.reference, (debug_reference_descriptor)debug_describe_resource, 0); - - return &sbuf->b.b; + + return &sbuf->b.b; no_sbuf: return NULL; -- cgit v1.2.3 From 6ed8fd3d6703bc51378e05a0e209df58d9844082 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 6 Oct 2015 16:55:39 -0600 Subject: svga: whitespace fixes in svga_sampler_view.c --- src/gallium/drivers/svga/svga_sampler_view.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index ffa5bce80c0..9c33a79db0f 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -48,9 +48,11 @@ svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv) { char res[128]; debug_describe_resource(res, sv->texture); - util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod); + util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", + res, sv->min_lod, sv->max_lod); } + struct svga_sampler_view * svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_resource *pt, @@ -58,10 +60,11 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); - struct svga_texture *tex = svga_texture(pt); + struct svga_texture *tex = svga_texture(pt); struct svga_sampler_view *sv = NULL; SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE; - SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, PIPE_BIND_SAMPLER_VIEW); + SVGA3dSurfaceFormat format = svga_translate_format(ss, pt->format, + PIPE_BIND_SAMPLER_VIEW); boolean view = TRUE; assert(pt); @@ -155,7 +158,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, sv->key.cachable = 0; sv->handle = tex->handle; debug_reference(&sv->reference, - (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); + (debug_reference_descriptor) + svga_debug_describe_sampler_view, 0); return sv; } @@ -164,13 +168,16 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pipe_mutex_unlock(ss->tex_mutex); debug_reference(&sv->reference, - (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); + (debug_reference_descriptor) + svga_debug_describe_sampler_view, 0); return sv; } + void -svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v) +svga_validate_sampler_view(struct svga_context *svga, + struct svga_sampler_view *v) { struct svga_texture *tex = svga_texture(v->texture); unsigned numFaces; @@ -186,7 +193,7 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * age = tex->age; - if(tex->b.b.target == PIPE_TEXTURE_CUBE) + if (tex->b.b.target == PIPE_TEXTURE_CUBE) numFaces = 6; else numFaces = 1; @@ -207,12 +214,13 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * v->age = age; } + void svga_destroy_sampler_view_priv(struct svga_sampler_view *v) { struct svga_texture *tex = svga_texture(v->texture); - if(v->handle != tex->handle) { + if (v->handle != tex->handle) { struct svga_screen *ss = svga_screen(v->texture->screen); SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); svga_screen_surface_destroy(ss, &v->key, &v->handle); -- cgit v1.2.3 From 5749676d03d1a4964888a2d9a7624d3b96cc4886 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 6 Sep 2015 15:43:23 +0200 Subject: radeonsi: remove TC L2 cache flush for index buffers on VI Reviewed-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_state_draw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 43170ec446b..5face423941 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -813,9 +813,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } } - /* TODO: VI should read index buffers through TC, so this shouldn't be - * needed on VI. */ - if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) { + /* VI reads index buffers through TC L2. */ + if (info->indexed && sctx->b.chip_class <= CIK && + r600_resource(ib.buffer)->TC_L2_dirty) { sctx->b.flags |= SI_CONTEXT_INV_TC_L2; r600_resource(ib.buffer)->TC_L2_dirty = false; } -- cgit v1.2.3 From 13e69805ea6a3aa0ec80c2b2430ae4da05c6b1ae Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 2 Oct 2015 19:21:54 +0200 Subject: radeonsi: fix a GS hang on VI Broken by one of the cleanups: 0d46c3bc9d09b376d74f7399e1a2d1b0a923640b Not applicable to stable. Reviewed-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3ff4b46251e..2abd5b5a0c3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -192,6 +192,7 @@ struct si_context { /* Precomputed states. */ struct si_pm4_state *init_config; + bool init_config_has_vgt_flush; struct si_pm4_state *vgt_shader_config[4]; /* With rasterizer discard, there doesn't have to be a pixel shader. * In that case, we bind this one: */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 77c585f958e..f673388b121 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1136,6 +1136,20 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom sctx->force_persample_interp); } +/** + * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that. + */ +static void si_init_config_add_vgt_flush(struct si_context *sctx) +{ + if (sctx->init_config_has_vgt_flush) + return; + + si_pm4_cmd_begin(sctx->init_config, PKT3_EVENT_WRITE); + si_pm4_cmd_add(sctx->init_config, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); + si_pm4_cmd_end(sctx->init_config, false); + sctx->init_config_has_vgt_flush = true; +} + /* Initialize state related to ESGS / GSVS ring buffers */ static void si_init_gs_rings(struct si_context *sctx) { @@ -1156,6 +1170,8 @@ static void si_init_gs_rings(struct si_context *sctx) return; } + si_init_config_add_vgt_flush(sctx); + /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { if (sctx->b.chip_class >= VI) { @@ -1402,6 +1418,8 @@ static void si_init_tess_factor_ring(struct si_context *sctx) assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0); + si_init_config_add_vgt_flush(sctx); + /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, -- cgit v1.2.3 From 164c8277f0edaab64e3cea43a1cf943ace62b21b Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 7 Oct 2015 11:43:58 -0700 Subject: egl/dri2: Properly dereference array. Fixes a regression that broke EGL since commit 858f2f2ae6d72f338fdd6d544b0c733814e22724 Author: Emil Velikov Date: Sun Sep 13 12:25:27 2015 +0100 egl/dri2: ease srgb __DRIconfig conditionals --- src/egl/drivers/dri2/egl_dri2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 229285fbbe6..4cc5f231333 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -1010,10 +1010,10 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, * doubleBufferMode check in * src/mesa/main/context.c:check_compatible() */ - if (dri2_config->dri_double_config) - dri_config = dri2_config->dri_double_config; + if (dri2_config->dri_double_config[0]) + dri_config = dri2_config->dri_double_config[0]; else - dri_config = dri2_config->dri_single_config; + dri_config = dri2_config->dri_single_config[0]; /* EGL_WINDOW_BIT is set only when there is a dri_double_config. This * makes sure the back buffer will always be used. -- cgit v1.2.3 From bbf728f11b45121bf2d03b61bc0ffc69e0ff0836 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 7 Oct 2015 21:23:16 +0100 Subject: Revert "mesa: enable KHR_debug for ES contexts" This reverts commit b69cfbdf18fa64606a76761b20bc268f4ac731e5. This isn't quite baked yet. Seems that despite building the ES piglits, none of them got executed. --- docs/relnotes/11.1.0.html | 1 - src/mesa/main/extensions.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 543ddc7958b..4c8b9d8256e 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -50,7 +50,6 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi
  • GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
  • GL_ARB_texture_query_lod on softpipe
  • -
  • GL_KHR_debug (GLES)
  • EGL_KHR_gl_colorspace on softpipe, llvmpipe
  • diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 281c640901e..b2c88c37366 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -342,7 +342,7 @@ static const struct extension extension_table[] = { { "GL_OES_vertex_array_object", o(dummy_true), ES1 | ES2, 2010 }, /* KHR extensions */ - { "GL_KHR_debug", o(dummy_true), GL | ES1 | ES2, 2012 }, + { "GL_KHR_debug", o(dummy_true), GL, 2012 }, { "GL_KHR_context_flush_control", o(dummy_true), GL | ES2, 2014 }, { "GL_KHR_texture_compression_astc_hdr", o(KHR_texture_compression_astc_hdr), GL | ES2, 2012 }, { "GL_KHR_texture_compression_astc_ldr", o(KHR_texture_compression_astc_ldr), GL | ES2, 2012 }, -- cgit v1.2.3 From 6ad9ebb073fc4ed245ef8e9db4479a52e818cb92 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 7 Oct 2015 15:52:09 -0700 Subject: mesa: Correctly handle GL_BGRA_EXT in ES3 format_and_type checks The EXT_texture_format_BGRA8888 extension (which mesa supports unconditionally) adds a new format and internal format called GL_BGRA_EXT. Previously, this was not really handled at all in _mesa_ex3_error_check_format_and_type. When the checks were tightened in commit f15a7f3c, we accidentally tightened things too far and GL_BGRA_EXT would always cause an error to be thrown. There were two primary issues here. First, is that _mesa_es3_effective_internal_format_for_format_and_type didn't handle the GL_BGRA_EXT format. Second is that it blindly uses _mesa_base_tex_format which returns GL_RGBA for GL_BGRA_EXT. This commit fixes both of these issues as well as adds explicit checks that GL_BGRA_EXT is only ever used with GL_BGRA_EXT and GL_UNSIGNED_BYTE. Signed-off-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92265 Reviewed-by: Ian Romanick Cc: "11.0" --- src/mesa/main/glformats.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 7dab33c0051..faa63825380 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -2678,6 +2678,7 @@ _mesa_es3_effective_internal_format_for_format_and_type(GLenum format, * internal formats, they do not correspond to GL constants, so the base * format is returned instead. */ + case GL_BGRA_EXT: case GL_LUMINANCE_ALPHA: case GL_LUMINANCE: case GL_ALPHA: @@ -2797,8 +2798,19 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, if (effectiveInternalFormat == GL_NONE) return GL_INVALID_OPERATION; - GLenum baseInternalFormat = - _mesa_base_tex_format(ctx, effectiveInternalFormat); + GLenum baseInternalFormat; + if (internalFormat == GL_BGRA_EXT) { + /* Unfortunately, _mesa_base_tex_format returns a base format of + * GL_RGBA for GL_BGRA_EXT. This makes perfect sense if you're + * asking the question, "what channels does this format have?" + * However, if we're trying to determine if two internal formats + * match in the ES3 sense, we actually want GL_BGRA. + */ + baseInternalFormat = GL_BGRA_EXT; + } else { + baseInternalFormat = + _mesa_base_tex_format(ctx, effectiveInternalFormat); + } if (internalFormat != baseInternalFormat) return GL_INVALID_OPERATION; @@ -2807,6 +2819,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, } switch (format) { + case GL_BGRA_EXT: + if (type != GL_UNSIGNED_BYTE || internalFormat != GL_BGRA) + return GL_INVALID_OPERATION; + break; + case GL_RGBA: switch (type) { case GL_UNSIGNED_BYTE: -- cgit v1.2.3 From 4e7fd66cf0986a7eb58800f52d0b8709c2f997d6 Mon Sep 17 00:00:00 2001 From: Tapani Pälli Date: Wed, 7 Oct 2015 10:04:06 +0300 Subject: glsl: add varyings to resource list only with SSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Varyings can be considered inputs or outputs of a program only when SSO is in use. With multi-stage programs, inputs contain only inputs for first stage and outputs contains outputs of the final shader stage. I've tested that fix works for Assault Android Cactus (demo version) and does not cause Piglit or CTS regressions in glGetProgramiv tests. Following ES 3.1 CTS separate shader tests that do query properties of varyings in SSO shader programs pass: ES31-CTS.program_interface_query.separate-programs-vertex ES31-CTS.program_interface_query.separate-programs-fragment Signed-off-by: Tapani Pälli Tested-by: Dieter Nützel Reviewed-by: Kenneth Graunke Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92122 --- src/glsl/linker.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 6df8d61cc44..a97b4ef0a32 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -3421,10 +3421,13 @@ build_program_resource_list(struct gl_shader_program *shProg) if (input_stage == MESA_SHADER_STAGES && output_stage == 0) return; - if (!add_packed_varyings(shProg, input_stage)) - return; - if (!add_packed_varyings(shProg, output_stage)) - return; + /* Program interface needs to expose varyings in case of SSO. */ + if (shProg->SeparateShader) { + if (!add_packed_varyings(shProg, input_stage)) + return; + if (!add_packed_varyings(shProg, output_stage)) + return; + } /* Add inputs and outputs to the resource list. */ if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir, -- cgit v1.2.3 From aee28a0aa3bb77b728972d4efcdf93c1cc09c73b Mon Sep 17 00:00:00 2001 From: Tapani Pälli Date: Thu, 8 Oct 2015 09:25:16 +0300 Subject: mesa: include bad type in error string of _mesa_pack_depth_span MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tapani Pälli Reviewed-by: Ian Romanick --- src/mesa/main/pack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index ef89e751a4c..00e31b05c99 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -1123,7 +1123,8 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, } break; default: - _mesa_problem(ctx, "bad type in _mesa_pack_depth_span"); + _mesa_problem(ctx, "bad type in _mesa_pack_depth_span (%s)", + _mesa_enum_to_string(dstType)); } free(depthCopy); -- cgit v1.2.3 From 0c2add775192f3ee0325d61964ef67f7ca3f6d4e Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 22 Sep 2015 12:53:08 +0200 Subject: i965: Fix remove_duplicate_mrf_writes so it can handle 24 MRFs in gen6 Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 1187c6765cd..2a90ed4b165 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2678,7 +2678,7 @@ fs_visitor::emit_repclear_shader() bool fs_visitor::remove_duplicate_mrf_writes() { - fs_inst *last_mrf_move[16]; + fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->gen)]; bool progress = false; /* Need to update the MRF tracking for compressed instructions. */ -- cgit v1.2.3 From 36e82b137d4a77f24de0fc722c80e445b6e3375c Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 22 Sep 2015 13:01:18 +0200 Subject: i965: make pull constant loads in gen6 start at MRFs 16/17 So they do not conflict with our (un)spills (MRF 21..23) or our URB writes (MRF 1..15) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2a90ed4b165..b4b98109e0c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -50,6 +50,8 @@ #include "glsl/glsl_types.h" #include "program/sampler.h" +#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13) + using namespace brw; void @@ -210,7 +212,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, inst->regs_written = regs_written; if (devinfo->gen < 7) { - inst->base_mrf = 13; + inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen); inst->header_size = 1; if (devinfo->gen == 4) inst->mlen = 3; @@ -2999,7 +3001,7 @@ fs_visitor::lower_uniform_pull_constant_loads() * else does except for register spill/unspill, which generates and * uses its MRF within a single IR instruction. */ - inst->base_mrf = 14; + inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1; inst->mlen = 1; } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ca7c01876b7..e0ccdb64543 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -27,6 +27,7 @@ #include "program/sampler.h" #define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13) +#define FIRST_PULL_LOAD_MRF(gen) (gen == 6 ? 16 : 13) namespace brw { @@ -792,7 +793,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, dst, surf_index, offset_reg); - pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; + pull->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1; pull->mlen = 1; } -- cgit v1.2.3 From 3141906fa36839e9276cb65033857c85b39376e5 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 22 Sep 2015 13:14:52 +0200 Subject: i965: Define FIRST_SPILL_MRF and FIRST_PULL_LOAD_MRF only once and in one place That should make tracking where we do spills and pull loads a bit easier. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 -- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 -- src/mesa/drivers/dri/i965/brw_inst.h | 6 ++++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 --- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b4b98109e0c..781e2d8027b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -50,8 +50,6 @@ #include "glsl/glsl_types.h" #include "program/sampler.h" -#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13) - using namespace brw; void diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 6900cee86f4..c3a037be4b1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -30,8 +30,6 @@ #include "glsl/glsl_types.h" #include "glsl/ir_optimization.h" -#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13) - using namespace brw; static void diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h index c5132ba15ed..ab37b709d65 100644 --- a/src/mesa/drivers/dri/i965/brw_inst.h +++ b/src/mesa/drivers/dri/i965/brw_inst.h @@ -42,6 +42,12 @@ extern "C" { /** Maximum SEND message length */ #define BRW_MAX_MSG_LENGTH 15 +/** First MRF register used by pull loads */ +#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13) + +/** First MRF register used by spills */ +#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13) + /* brw_context.h has a forward declaration of brw_inst, so name the struct. */ typedef struct brw_inst { uint64_t data[2]; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e0ccdb64543..7bc13fe29d6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -26,9 +26,6 @@ #include "glsl/ir_uniform.h" #include "program/sampler.h" -#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13) -#define FIRST_PULL_LOAD_MRF(gen) (gen == 6 ? 16 : 13) - namespace brw { vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst, -- cgit v1.2.3 From 1efbb8151b402f76df6dbf0b4ed9c2823e3a44fd Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 23 Sep 2015 08:52:07 +0200 Subject: i965/gs/gen6: Maximum allowed size of SEND messages is 15 (4 bits) Comit d48ac9306619 addressed this for VS, but we forgot to do the same for URB writes generated by the gen6 GS. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 30 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index def21d80b24..59a76559103 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -273,6 +273,18 @@ gen6_gs_visitor::emit_urb_write_header(int mrf) emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data); } +static int +align_interleaved_urb_mlen(int mlen) +{ + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + */ + if ((mlen % 2) != 1) + mlen++; + return mlen; +} + void gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf, int last_mrf, int urb_offset) @@ -299,14 +311,7 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf, } inst->base_mrf = base_mrf; - /* URB data written (does not include the message header reg) must - * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, - * section 5.4.3.2.2: URB_INTERLEAVED. - */ - int mlen = last_mrf - base_mrf; - if ((mlen % 2) != 1) - mlen++; - inst->mlen = mlen; + inst->mlen = align_interleaved_urb_mlen(last_mrf - base_mrf); inst->offset = urb_offset; } @@ -339,9 +344,9 @@ gen6_gs_visitor::emit_thread_end() /* In the process of generating our URB write message contents, we * may need to unspill a register or load from an array. Those - * reads would use MRFs 14-15. + * reads would use MRFs 21..23 */ - int max_usable_mrf = 13; + int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen); /* Issue the FF_SYNC message and obtain the initial VUE handle. */ emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G)); @@ -416,9 +421,10 @@ gen6_gs_visitor::emit_thread_end() this->vertex_output_offset, 1u)); /* If this was max_usable_mrf, we can't fit anything more into - * this URB WRITE. + * this URB WRITE. Same if we reached the max. message length. */ - if (mrf > max_usable_mrf) { + if (mrf > max_usable_mrf || + align_interleaved_urb_mlen(mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) { slot++; break; } -- cgit v1.2.3 From 89ae41ab4c9e32db9cbef0cb01a99c8e3efbb289 Mon Sep 17 00:00:00 2001 From: Boyan Ding Date: Thu, 8 Oct 2015 15:38:15 +0800 Subject: docs/relnotes: document EGL_KHR_create_context on llvmpipe and softpipe Signed-off-by: Boyan Ding --- docs/relnotes/11.1.0.html | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 4c8b9d8256e..d4f30d0da62 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -50,6 +50,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi
  • GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
  • GL_ARB_texture_query_lod on softpipe
  • +
  • EGL_KHR_create_context on softpipe, llvmpipe
  • EGL_KHR_gl_colorspace on softpipe, llvmpipe
  • -- cgit v1.2.3 From c8031a879a3f442a4ca43243914c797615110d9b Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Sat, 3 Oct 2015 19:19:13 -0500 Subject: c11/threads: initialize timeout structure Signed-off-by: Jan Vesely Reviewed-by: Ian Romanick --- include/c11/threads_posix.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/c11/threads_posix.h b/include/c11/threads_posix.h index 3def6c41ca5..ce9853b18b3 100644 --- a/include/c11/threads_posix.h +++ b/include/c11/threads_posix.h @@ -136,8 +136,14 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt) { struct timespec abs_time; int rt; + assert(mtx != NULL); assert(cond != NULL); + assert(xt != NULL); + + abs_time.tv_sec = xt->sec; + abs_time.tv_nsec = xt->nsec; + rt = pthread_cond_timedwait(cond, mtx, &abs_time); if (rt == ETIMEDOUT) return thrd_busy; -- cgit v1.2.3 From 9c528f5dfa69fcb4068df736be441c0db19e6e88 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 5 Oct 2015 17:16:02 -0700 Subject: nir/sweep: Reparent the shader name Previously the name of the nir shader was being freed prematurely during nir_sweep. Since 756613ed35d the name was later being used to generate filenames for the optimiser debug output and these would end up with garbage from the dangling pointer. Co-authored-by: Neil Roberts Reviewed-by: Matt Turner --- src/glsl/nir/nir_sweep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c index d3549756a63..b6ce43b5224 100644 --- a/src/glsl/nir/nir_sweep.c +++ b/src/glsl/nir/nir_sweep.c @@ -154,6 +154,8 @@ nir_sweep(nir_shader *nir) /* First, move ownership of all the memory to a temporary context; assume dead. */ ralloc_adopt(rubbish, nir); + ralloc_steal(nir, (char *)nir->info.name); + /* Variables and registers are not dead. Steal them back. */ steal_list(nir, nir_variable, &nir->uniforms); steal_list(nir, nir_variable, &nir->inputs); -- cgit v1.2.3 From 62741ff05272f5879fba753121342e27afa340f6 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Oct 2015 15:50:12 +0100 Subject: gallium/ddebug: automake: sort sources alphabetically Signed-off-by: Emil Velikov --- src/gallium/drivers/ddebug/Makefile.sources | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/ddebug/Makefile.sources b/src/gallium/drivers/ddebug/Makefile.sources index 780edd8aa00..921703e90f3 100644 --- a/src/gallium/drivers/ddebug/Makefile.sources +++ b/src/gallium/drivers/ddebug/Makefile.sources @@ -1,6 +1,6 @@ C_SOURCES := \ - dd_pipe.h \ - dd_public.h \ dd_context.c \ dd_draw.c \ + dd_pipe.h \ + dd_public.h \ dd_screen.c -- cgit v1.2.3 From 1fda56cdb2e8d982ccb063e63347103c03aeeadb Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Oct 2015 15:50:54 +0100 Subject: gallium/ddebug: add missing dd_util.h to sources list Signed-off-by: Emil Velikov --- src/gallium/drivers/ddebug/Makefile.sources | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/ddebug/Makefile.sources b/src/gallium/drivers/ddebug/Makefile.sources index 921703e90f3..1bd38274df8 100644 --- a/src/gallium/drivers/ddebug/Makefile.sources +++ b/src/gallium/drivers/ddebug/Makefile.sources @@ -3,4 +3,5 @@ C_SOURCES := \ dd_draw.c \ dd_pipe.h \ dd_public.h \ - dd_screen.c + dd_screen.c \ + dd_util.h -- cgit v1.2.3 From ba71d581aeb96c4626500eb5b19f3bef2f40d586 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 5 Oct 2015 13:58:05 -0700 Subject: i965: Move brw_dump_ir() out of brw_*_emit() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We move these calls one level up into the codegen functions. Reviewed-by: Topi Pohjolainen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_cs.c | 3 +++ src/mesa/drivers/dri/i965/brw_fs.cpp | 13 ------------- src/mesa/drivers/dri/i965/brw_gs.c | 3 +++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 7 ------- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 3 --- src/mesa/drivers/dri/i965/brw_vs.c | 3 +++ src/mesa/drivers/dri/i965/brw_wm.c | 3 +++ 7 files changed, 12 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 6b64030a868..34680ee6e3a 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -98,6 +98,9 @@ brw_codegen_cs_prog(struct brw_context *brw, start_time = get_time(); } + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + brw_dump_ir("compute", prog, &cs->base, &cp->program.Base); + program = brw_cs_emit(brw, mem_ctx, key, &prog_data, &cp->program, prog, &program_size); if (program == NULL) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 781e2d8027b..7c401535f88 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5100,13 +5100,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size) { - struct brw_shader *shader = NULL; - if (prog) - shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) - brw_dump_ir("fragment", prog, &shader->base, &fp->Base); - int st_index8 = -1, st_index16 = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) { st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8); @@ -5224,12 +5217,6 @@ brw_cs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size) { - struct brw_shader *shader = - (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - - if (unlikely(INTEL_DEBUG & DEBUG_CS)) - brw_dump_ir("compute", prog, &shader->base, &cp->Base); - prog_data->local_size[0] = cp->LocalSize[0]; prog_data->local_size[1] = cp->LocalSize[1]; prog_data->local_size[2] = cp->LocalSize[2]; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index f6b9874f78a..26c91e4c76a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -291,6 +291,9 @@ brw_codegen_gs_prog(struct brw_context *brw, */ c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; + if (unlikely(INTEL_DEBUG & DEBUG_GS)) + brw_dump_ir("geometry", prog, gs, NULL); + void *mem_ctx = ralloc_context(NULL); unsigned program_size; const unsigned *program = diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 76ce0c46198..4b4a216d5f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1947,17 +1947,10 @@ brw_vs_emit(struct brw_context *brw, { const unsigned *assembly = NULL; - struct brw_shader *shader = NULL; - if (prog) - shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; - int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS); - if (unlikely(INTEL_DEBUG & DEBUG_VS)) - brw_dump_ir("vertex", prog, &shader->base, &vp->Base); - if (brw->intelScreen->compiler->scalar_vs) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index f6967a74d4e..36ab25d1259 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -622,9 +622,6 @@ brw_gs_emit(struct brw_context *brw, { struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - if (unlikely(INTEL_DEBUG & DEBUG_GS)) - brw_dump_ir("geometry", prog, shader, NULL); - int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 3c6ee0a7a03..63d2e3f0f03 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -195,6 +195,9 @@ brw_codegen_vs_prog(struct brw_context *brw, start_time = get_time(); } + if (unlikely(INTEL_DEBUG & DEBUG_VS)) + brw_dump_ir("vertex", prog, &vs->base, &vp->program.Base); + /* Emit GEN4 code. */ program = brw_vs_emit(brw, mem_ctx, key, &prog_data, diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 98920463503..81a1d90bb60 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -221,6 +221,9 @@ brw_codegen_wm_prog(struct brw_context *brw, start_time = get_time(); } + if (unlikely(INTEL_DEBUG & DEBUG_WM)) + brw_dump_ir("fragment", prog, &fs->base, &fp->program.Base); + program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data, &fp->program, prog, &program_size); if (program == NULL) { -- cgit v1.2.3 From 04158fb0f6e1148fdf155f78147cfc58b5d2b82c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 5 Oct 2015 14:13:29 -0700 Subject: util: Move DRI parse_debug_string() to util MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to use intel_debug.c in code that doesn't link to dri common. v2: Remove unnecessary stddef.h include (Topi), use util/debug.h in all DRI driver and remove driParseDebugString() (Iago). Reviewed-by: Topi Pohjolainen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/common/utils.c | 30 ---------------- src/mesa/drivers/dri/common/utils.h | 8 ----- src/mesa/drivers/dri/i915/intel_context.c | 5 +-- src/mesa/drivers/dri/i965/intel_debug.c | 5 +-- src/mesa/drivers/dri/radeon/radeon_context.c | 4 +-- src/mesa/drivers/dri/radeon/radeon_debug.c | 6 ++-- src/util/Makefile.sources | 2 ++ src/util/debug.c | 53 ++++++++++++++++++++++++++++ src/util/debug.h | 46 ++++++++++++++++++++++++ 9 files changed, 112 insertions(+), 47 deletions(-) create mode 100644 src/util/debug.c create mode 100644 src/util/debug.h diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index 1246bec6e02..ae8fcabc14e 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -41,36 +41,6 @@ #include "utils.h" #include "dri_util.h" - -uint64_t -driParseDebugString(const char *debug, - const struct dri_debug_control *control) -{ - uint64_t flag = 0; - - if (debug != NULL) { - for (; control->string != NULL; control++) { - if (!strcmp(debug, "all")) { - flag |= control->flag; - - } else { - const char *s = debug; - unsigned n; - - for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) { - if (strlen(control->string) == n && - !strncmp(control->string, s, n)) - flag |= control->flag; - } - } - } - } - - return flag; -} - - - /** * Create the \c GL_RENDERER string for DRI drivers. * diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h index 3760c38fcaf..f6b8d7c3a21 100644 --- a/src/mesa/drivers/dri/common/utils.h +++ b/src/mesa/drivers/dri/common/utils.h @@ -32,14 +32,6 @@ #include #include "main/context.h" -struct dri_debug_control { - const char * string; - uint64_t flag; -}; - -extern uint64_t driParseDebugString( const char * debug, - const struct dri_debug_control * control ); - extern unsigned driGetRendererString( char * buffer, const char * hardware_name, GLuint agp_mode ); diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index 644bede9d47..3f429f25d10 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -56,6 +56,7 @@ #include "intel_mipmap_tree.h" #include "utils.h" +#include "util/debug.h" #include "util/ralloc.h" int INTEL_DEBUG = (0); @@ -290,7 +291,7 @@ intel_viewport(struct gl_context *ctx) intelCalcViewport(ctx); } -static const struct dri_debug_control debug_control[] = { +static const struct debug_control debug_control[] = { { "tex", DEBUG_TEXTURE}, { "state", DEBUG_STATE}, { "blit", DEBUG_BLIT}, @@ -512,7 +513,7 @@ intelInitContext(struct intel_context *intel, intelInitExtensions(ctx); - INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); + INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control); if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, true); if (INTEL_DEBUG & DEBUG_PERF) diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 5a9c9533fde..31201892145 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -33,10 +33,11 @@ #include "intel_debug.h" #include "utils.h" #include "util/u_atomic.h" /* for p_atomic_cmpxchg */ +#include "util/debug.h" uint64_t INTEL_DEBUG = 0; -static const struct dri_debug_control debug_control[] = { +static const struct debug_control debug_control[] = { { "tex", DEBUG_TEXTURE}, { "state", DEBUG_STATE}, { "blit", DEBUG_BLIT}, @@ -93,7 +94,7 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) void brw_process_intel_debug_variable(struct intel_screen *screen) { - uint64_t intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); + uint64_t intel_debug = parse_debug_string(getenv("INTEL_DEBUG"), debug_control); (void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug); if (INTEL_DEBUG & DEBUG_BUFMGR) diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index a9e2ab563d3..5e15b46fb32 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -341,8 +341,8 @@ r100CreateContext( gl_api api, #if DO_DEBUG - RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ), - debug_control ); + RADEON_DEBUG = parse_debug_string( getenv( "RADEON_DEBUG" ), + debug_control ); #endif tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode"); diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.c b/src/mesa/drivers/dri/radeon/radeon_debug.c index 7ddba1ae85f..383a5df6749 100644 --- a/src/mesa/drivers/dri/radeon/radeon_debug.c +++ b/src/mesa/drivers/dri/radeon/radeon_debug.c @@ -27,7 +27,7 @@ * Pauli Nieminen */ -#include "utils.h" +#include "util/debug.h" #include "radeon_common_context.h" #include "radeon_debug.h" @@ -35,7 +35,7 @@ #include #include -static const struct dri_debug_control debug_control[] = { +static const struct debug_control debug_control[] = { {"fall", RADEON_FALLBACKS}, {"tex", RADEON_TEXTURE}, {"ioctl", RADEON_IOCTL}, @@ -61,7 +61,7 @@ radeon_debug_type_t radeon_enabled_debug_types; void radeon_init_debug(void) { - radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); + radeon_enabled_debug_types = parse_debug_string(getenv("RADEON_DEBUG"), debug_control); radeon_enabled_debug_types |= RADEON_GENERAL; } diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources index ef38b5ac7d1..e45431d1de8 100644 --- a/src/util/Makefile.sources +++ b/src/util/Makefile.sources @@ -1,5 +1,7 @@ MESA_UTIL_FILES := \ bitset.h \ + debug.c \ + debug.h \ format_srgb.h \ hash_table.c \ hash_table.h \ diff --git a/src/util/debug.c b/src/util/debug.c new file mode 100644 index 00000000000..3729ce85670 --- /dev/null +++ b/src/util/debug.c @@ -0,0 +1,53 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "main/macros.h" +#include "debug.h" + +uint64_t +parse_debug_string(const char *debug, + const struct debug_control *control) +{ + uint64_t flag = 0; + + if (debug != NULL) { + for (; control->string != NULL; control++) { + if (!strcmp(debug, "all")) { + flag |= control->flag; + + } else { + const char *s = debug; + unsigned n; + + for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) { + if (strlen(control->string) == n && + !strncmp(control->string, s, n)) + flag |= control->flag; + } + } + } + } + + return flag; +} diff --git a/src/util/debug.h b/src/util/debug.h new file mode 100644 index 00000000000..801736aafff --- /dev/null +++ b/src/util/debug.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _DEBUG_H +#define _DEBUG_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct debug_control { + const char * string; + uint64_t flag; +}; + +uint64_t +parse_debug_string(const char *debug, + const struct debug_control *control); + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif /* _DEBUG_H */ -- cgit v1.2.3 From 99ca2256c1b8414efd27864bd56f6c95cac55731 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 6 Oct 2015 16:19:04 -0700 Subject: i965: Configure bufmgr debug options from intel_screen.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need the debug flag parsing and INTEL_DEBUG in the compiler, but we don't want the dependency on bufmgr (libdrm_intel) in there. Move to intel_screen.c. There are now only two lines left in brw_process_intel_debug_variable(), but we keep it in intel_debug.h to avoid having to expose 'debug_control' as a global variable. Reviewed-by: Topi Pohjolainen Reviewed-by: Iago Toral Quiroga Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/intel_debug.c | 14 +------------- src/mesa/drivers/dri/i965/intel_debug.h | 4 +--- src/mesa/drivers/dri/i965/intel_screen.c | 14 +++++++++++++- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 31201892145..f7c02c8a38d 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -92,22 +92,10 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) } void -brw_process_intel_debug_variable(struct intel_screen *screen) +brw_process_intel_debug_variable(void) { uint64_t intel_debug = parse_debug_string(getenv("INTEL_DEBUG"), debug_control); (void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug); - - if (INTEL_DEBUG & DEBUG_BUFMGR) - dri_bufmgr_set_debug(screen->bufmgr, true); - - if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && screen->devinfo->gen < 7) { - fprintf(stderr, - "shader_time debugging requires gen7 (Ivybridge) or better.\n"); - INTEL_DEBUG &= ~DEBUG_SHADER_TIME; - } - - if (INTEL_DEBUG & DEBUG_AUB) - drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true); } /** diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index b7d0c823fa8..0a6e1b90b98 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -115,8 +115,6 @@ extern uint64_t INTEL_DEBUG; extern uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage); -struct intel_screen; - -extern void brw_process_intel_debug_variable(struct intel_screen *); +extern void brw_process_intel_debug_variable(void); extern bool brw_env_var_as_boolean(const char *var_name, bool default_value); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 17838350cda..590c45d93ea 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1421,7 +1421,19 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) if (!intelScreen->devinfo) return false; - brw_process_intel_debug_variable(intelScreen); + brw_process_intel_debug_variable(); + + if (INTEL_DEBUG & DEBUG_BUFMGR) + dri_bufmgr_set_debug(intelScreen->bufmgr, true); + + if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && intelScreen->devinfo->gen < 7) { + fprintf(stderr, + "shader_time debugging requires gen7 (Ivybridge) or better.\n"); + INTEL_DEBUG &= ~DEBUG_SHADER_TIME; + } + + if (INTEL_DEBUG & DEBUG_AUB) + drm_intel_bufmgr_gem_set_aub_dump(intelScreen->bufmgr, true); intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7; -- cgit v1.2.3 From 4f33700f5ab6e69072318622651ea81639845ae9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 5 Oct 2015 14:22:23 -0700 Subject: i965: Move brw_link_shader() and friends to new file brw_link.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to use the rest of brw_shader.cpp with the rest of the compiler without pulling in the GLSL linking code. Reviewed-by: Topi Pohjolainen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_link.cpp | 280 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 250 +------------------------- src/mesa/drivers/dri/i965/brw_shader.h | 2 + 4 files changed, 284 insertions(+), 249 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_link.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 6f97f735add..7e1586ffbbd 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -73,6 +73,7 @@ i965_FILES = \ brw_ir_allocator.h \ brw_ir_fs.h \ brw_ir_vec4.h \ + brw_link.cpp \ brw_lower_texture_gradients.cpp \ brw_lower_unnormalized_offset.cpp \ brw_meta_fast_clear.c \ diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp new file mode 100644 index 00000000000..fc9bee43d80 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -0,0 +1,280 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "main/macros.h" +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_gs.h" +#include "brw_fs.h" +#include "brw_cfg.h" +#include "brw_nir.h" +#include "glsl/ir_optimization.h" +#include "glsl/glsl_parser_extras.h" +#include "main/shaderapi.h" + +/** + * Performs a compile of the shader stages even when we don't know + * what non-orthogonal state will be set, in the hope that it reflects + * the eventual NOS used, and thus allows us to produce link failures. + */ +static bool +brw_shader_precompile(struct gl_context *ctx, + struct gl_shader_program *sh_prog) +{ + struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; + struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; + struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + + if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program)) + return false; + + if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program)) + return false; + + if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program)) + return false; + + if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program)) + return false; + + return true; +} + +static void +brw_lower_packing_builtins(struct brw_context *brw, + gl_shader_stage shader_type, + exec_list *ir) +{ + int ops = LOWER_PACK_SNORM_2x16 + | LOWER_UNPACK_SNORM_2x16 + | LOWER_PACK_UNORM_2x16 + | LOWER_UNPACK_UNORM_2x16; + + if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { + ops |= LOWER_UNPACK_UNORM_4x8 + | LOWER_UNPACK_SNORM_4x8 + | LOWER_PACK_UNORM_4x8 + | LOWER_PACK_SNORM_4x8; + } + + if (brw->gen >= 7) { + /* Gen7 introduced the f32to16 and f16to32 instructions, which can be + * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no + * lowering is needed. For SOA code, the Half2x16 ops must be + * scalarized. + */ + if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { + ops |= LOWER_PACK_HALF_2x16_TO_SPLIT + | LOWER_UNPACK_HALF_2x16_TO_SPLIT; + } + } else { + ops |= LOWER_PACK_HALF_2x16 + | LOWER_UNPACK_HALF_2x16; + } + + lower_packing_builtins(ir, ops); +} + +static void +process_glsl_ir(gl_shader_stage stage, + struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_shader *shader) +{ + struct gl_context *ctx = &brw->ctx; + const struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[shader->Stage]; + + /* Temporary memory context for any new IR. */ + void *mem_ctx = ralloc_context(NULL); + + ralloc_adopt(mem_ctx, shader->ir); + + /* lower_packing_builtins() inserts arithmetic instructions, so it + * must precede lower_instructions(). + */ + brw_lower_packing_builtins(brw, shader->Stage, shader->ir); + do_mat_op_to_vec(shader->ir); + const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; + lower_instructions(shader->ir, + MOD_TO_FLOOR | + DIV_TO_MUL_RCP | + SUB_TO_ADD_NEG | + EXP_TO_EXP2 | + LOG_TO_LOG2 | + bitfield_insert | + LDEXP_TO_ARITH | + CARRY_TO_ARITH | + BORROW_TO_ARITH); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, + * if-statements need to be flattened. + */ + if (brw->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + + do_lower_texture_projection(shader->ir); + brw_lower_texture_gradients(brw, shader->ir); + do_vec_index_to_cond_assign(shader->ir); + lower_vector_insert(shader->ir, true); + lower_offset_arrays(shader->ir); + brw_do_lower_unnormalized_offset(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); + + bool lowered_variable_indexing = + lower_variable_index_to_cond_assign((gl_shader_stage)stage, + shader->ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform); + + if (unlikely(brw->perf_debug && lowered_variable_indexing)) { + perf_debug("Unsupported form of variable indexing in %s; falling " + "back to very inefficient code generation\n", + _mesa_shader_stage_to_abbrev(shader->Stage)); + } + + lower_ubo_reference(shader, shader->ir); + + bool progress; + do { + progress = false; + + if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } + + progress = do_lower_jumps(shader->ir, true, true, + true, /* main return */ + false, /* continue */ + false /* loops */ + ) || progress; + + progress = do_common_optimization(shader->ir, true, true, + options, ctx->Const.NativeIntegers) || progress; + } while (progress); + + validate_ir_tree(shader->ir); + + /* Now that we've finished altering the linked IR, reparent any live IR back + * to the permanent memory context, and free the temporary one (discarding any + * junk we optimized away). + */ + reparent_ir(shader->ir, shader->ir); + ralloc_free(mem_ctx); + + if (ctx->_Shader->Flags & GLSL_DUMP) { + fprintf(stderr, "\n"); + fprintf(stderr, "GLSL IR for linked %s program %d:\n", + _mesa_shader_stage_to_string(shader->Stage), + shader_prog->Name); + _mesa_print_ir(stderr, shader->ir, NULL); + fprintf(stderr, "\n"); + } +} + +GLboolean +brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) +{ + struct brw_context *brw = brw_context(ctx); + const struct brw_compiler *compiler = brw->intelScreen->compiler; + unsigned int stage; + + for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { + struct gl_shader *shader = shProg->_LinkedShaders[stage]; + if (!shader) + continue; + + struct gl_program *prog = + ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage), + shader->Name); + if (!prog) + return false; + prog->Parameters = _mesa_new_parameter_list(); + + _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); + + process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader); + + /* Make a pass over the IR to add state references for any built-in + * uniforms that are used. This has to be done now (during linking). + * Code generation doesn't happen until the first time this shader is + * used for rendering. Waiting until then to generate the parameters is + * too late. At that point, the values for the built-in uniforms won't + * get sent to the shader. + */ + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_uniform) + || (strncmp(var->name, "gl_", 3) != 0)) + continue; + + const ir_state_slot *const slots = var->get_state_slots(); + assert(slots != NULL); + + for (unsigned int i = 0; i < var->get_num_state_slots(); i++) { + _mesa_add_state_reference(prog->Parameters, + (gl_state_index *) slots[i].tokens); + } + } + + do_set_program_inouts(shader->ir, prog, shader->Stage); + + prog->SamplersUsed = shader->active_samplers; + prog->ShadowSamplers = shader->shadow_samplers; + _mesa_update_shader_textures_used(shProg, prog); + + _mesa_reference_program(ctx, &shader->Program, prog); + + brw_add_texrect_params(prog); + + prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, + is_scalar_shader_stage(compiler, stage)); + + _mesa_reference_program(ctx, &prog, NULL); + } + + if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) { + for (unsigned i = 0; i < shProg->NumShaders; i++) { + const struct gl_shader *sh = shProg->Shaders[i]; + if (!sh) + continue; + + fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n", + _mesa_shader_stage_to_string(sh->Stage), + i, shProg->Name); + fprintf(stderr, "%s", sh->Source); + fprintf(stderr, "\n"); + } + } + + if (brw->precompile && !brw_shader_precompile(ctx, shProg)) + return false; + + return true; +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 7bc080b082e..9ad7f3c3e9a 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -72,7 +72,7 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) va_end(args); } -static bool +bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage) { switch (stage) { @@ -164,254 +164,6 @@ brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) return &shader->base; } -/** - * Performs a compile of the shader stages even when we don't know - * what non-orthogonal state will be set, in the hope that it reflects - * the eventual NOS used, and thus allows us to produce link failures. - */ -static bool -brw_shader_precompile(struct gl_context *ctx, - struct gl_shader_program *sh_prog) -{ - struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; - struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - - if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program)) - return false; - - if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program)) - return false; - - if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program)) - return false; - - if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program)) - return false; - - return true; -} - -static void -brw_lower_packing_builtins(struct brw_context *brw, - gl_shader_stage shader_type, - exec_list *ir) -{ - int ops = LOWER_PACK_SNORM_2x16 - | LOWER_UNPACK_SNORM_2x16 - | LOWER_PACK_UNORM_2x16 - | LOWER_UNPACK_UNORM_2x16; - - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { - ops |= LOWER_UNPACK_UNORM_4x8 - | LOWER_UNPACK_SNORM_4x8 - | LOWER_PACK_UNORM_4x8 - | LOWER_PACK_SNORM_4x8; - } - - if (brw->gen >= 7) { - /* Gen7 introduced the f32to16 and f16to32 instructions, which can be - * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no - * lowering is needed. For SOA code, the Half2x16 ops must be - * scalarized. - */ - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { - ops |= LOWER_PACK_HALF_2x16_TO_SPLIT - | LOWER_UNPACK_HALF_2x16_TO_SPLIT; - } - } else { - ops |= LOWER_PACK_HALF_2x16 - | LOWER_UNPACK_HALF_2x16; - } - - lower_packing_builtins(ir, ops); -} - -static void -process_glsl_ir(gl_shader_stage stage, - struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_shader *shader) -{ - struct gl_context *ctx = &brw->ctx; - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[shader->Stage]; - - /* Temporary memory context for any new IR. */ - void *mem_ctx = ralloc_context(NULL); - - ralloc_adopt(mem_ctx, shader->ir); - - /* lower_packing_builtins() inserts arithmetic instructions, so it - * must precede lower_instructions(). - */ - brw_lower_packing_builtins(brw, shader->Stage, shader->ir); - do_mat_op_to_vec(shader->ir); - const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; - lower_instructions(shader->ir, - MOD_TO_FLOOR | - DIV_TO_MUL_RCP | - SUB_TO_ADD_NEG | - EXP_TO_EXP2 | - LOG_TO_LOG2 | - bitfield_insert | - LDEXP_TO_ARITH | - CARRY_TO_ARITH | - BORROW_TO_ARITH); - - /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, - * if-statements need to be flattened. - */ - if (brw->gen < 6) - lower_if_to_cond_assign(shader->ir, 16); - - do_lower_texture_projection(shader->ir); - brw_lower_texture_gradients(brw, shader->ir); - do_vec_index_to_cond_assign(shader->ir); - lower_vector_insert(shader->ir, true); - lower_offset_arrays(shader->ir); - brw_do_lower_unnormalized_offset(shader->ir); - lower_noise(shader->ir); - lower_quadop_vector(shader->ir, false); - - bool lowered_variable_indexing = - lower_variable_index_to_cond_assign((gl_shader_stage)stage, - shader->ir, - options->EmitNoIndirectInput, - options->EmitNoIndirectOutput, - options->EmitNoIndirectTemp, - options->EmitNoIndirectUniform); - - if (unlikely(brw->perf_debug && lowered_variable_indexing)) { - perf_debug("Unsupported form of variable indexing in %s; falling " - "back to very inefficient code generation\n", - _mesa_shader_stage_to_abbrev(shader->Stage)); - } - - lower_ubo_reference(shader, shader->ir); - - bool progress; - do { - progress = false; - - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) { - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); - } - - progress = do_lower_jumps(shader->ir, true, true, - true, /* main return */ - false, /* continue */ - false /* loops */ - ) || progress; - - progress = do_common_optimization(shader->ir, true, true, - options, ctx->Const.NativeIntegers) || progress; - } while (progress); - - validate_ir_tree(shader->ir); - - /* Now that we've finished altering the linked IR, reparent any live IR back - * to the permanent memory context, and free the temporary one (discarding any - * junk we optimized away). - */ - reparent_ir(shader->ir, shader->ir); - ralloc_free(mem_ctx); - - if (ctx->_Shader->Flags & GLSL_DUMP) { - fprintf(stderr, "\n"); - fprintf(stderr, "GLSL IR for linked %s program %d:\n", - _mesa_shader_stage_to_string(shader->Stage), - shader_prog->Name); - _mesa_print_ir(stderr, shader->ir, NULL); - fprintf(stderr, "\n"); - } -} - -GLboolean -brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) -{ - struct brw_context *brw = brw_context(ctx); - const struct brw_compiler *compiler = brw->intelScreen->compiler; - unsigned int stage; - - for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - struct gl_shader *shader = shProg->_LinkedShaders[stage]; - if (!shader) - continue; - - struct gl_program *prog = - ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage), - shader->Name); - if (!prog) - return false; - prog->Parameters = _mesa_new_parameter_list(); - - _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); - - process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader); - - /* Make a pass over the IR to add state references for any built-in - * uniforms that are used. This has to be done now (during linking). - * Code generation doesn't happen until the first time this shader is - * used for rendering. Waiting until then to generate the parameters is - * too late. At that point, the values for the built-in uniforms won't - * get sent to the shader. - */ - foreach_in_list(ir_instruction, node, shader->ir) { - ir_variable *var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_uniform) - || (strncmp(var->name, "gl_", 3) != 0)) - continue; - - const ir_state_slot *const slots = var->get_state_slots(); - assert(slots != NULL); - - for (unsigned int i = 0; i < var->get_num_state_slots(); i++) { - _mesa_add_state_reference(prog->Parameters, - (gl_state_index *) slots[i].tokens); - } - } - - do_set_program_inouts(shader->ir, prog, shader->Stage); - - prog->SamplersUsed = shader->active_samplers; - prog->ShadowSamplers = shader->shadow_samplers; - _mesa_update_shader_textures_used(shProg, prog); - - _mesa_reference_program(ctx, &shader->Program, prog); - - brw_add_texrect_params(prog); - - prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, - is_scalar_shader_stage(compiler, stage)); - - _mesa_reference_program(ctx, &prog, NULL); - } - - if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) { - for (unsigned i = 0; i < shProg->NumShaders; i++) { - const struct gl_shader *sh = shProg->Shaders[i]; - if (!sh) - continue; - - fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n", - _mesa_shader_stage_to_string(sh->Stage), - i, shProg->Name); - fprintf(stderr, "%s", sh->Source); - fprintf(stderr, "\n"); - } - } - - if (brw->precompile && !brw_shader_precompile(ctx, shProg)) - return false; - - return true; -} - - enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index b6c070ef4c7..ad2de5eae2d 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -318,6 +318,8 @@ bool brw_cs_precompile(struct gl_context *ctx, int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); +bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 469d0e449b78ad68e199dbe60e900487255a5d5d Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 5 Oct 2015 22:07:58 -0700 Subject: i965/cs: Split out helper for building local id payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial motivation for this patch was to avoid calling brw_cs_prog_local_id_payload_dwords() in gen7_cs_state.c from the compiler. This commit ends up refactoring things a bit more so as to split out the logic to build the local id payload to brw_fs.cpp. This moves the payload building closer to the compiler code that uses the payload layout and makes it available to other users of the compiler. Reviewed-by: Topi Pohjolainen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_cs.h | 5 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 69 ++++++++++++++++++++++++-- src/mesa/drivers/dri/i965/gen7_cs_state.c | 80 ++++--------------------------- 4 files changed, 77 insertions(+), 78 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0a29a692016..1869f284331 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -484,6 +484,7 @@ struct brw_cs_prog_data { unsigned simd_size; bool uses_barrier; bool uses_num_work_groups; + unsigned local_invocation_id_regs; struct { /** @{ diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 0c0ed2bc909..c07eb6ca6ee 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -48,8 +48,9 @@ brw_cs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size); -unsigned -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width); +void +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, + void *buffer, uint32_t threads, uint32_t stride); #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7c401535f88..6ce15703340 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4718,20 +4718,43 @@ fs_visitor::setup_vs_payload() payload.num_regs = 2; } +/** + * We are building the local ID push constant data using the simplest possible + * method. We simply push the local IDs directly as they should appear in the + * registers for the uvec3 gl_LocalInvocationID variable. + * + * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 + * registers worth of push constant space. + * + * Note: Any updates to brw_cs_prog_local_id_payload_dwords, + * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need + * to coordinated. + * + * FINISHME: There are a few easy optimizations to consider. + * + * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is + * no need for using push constant space for that dimension. + * + * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can + * easily use 16-bit words rather than 32-bit dwords in the push constant + * data. + * + * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for + * conveying the data, and thereby reduce push constant usage. + * + */ void fs_visitor::setup_cs_payload() { assert(devinfo->gen >= 7); + brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data; payload.num_regs = 1; if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) { - const unsigned local_id_dwords = - brw_cs_prog_local_id_payload_dwords(dispatch_width); - assert((local_id_dwords & 0x7) == 0); - const unsigned local_id_regs = local_id_dwords / 8; + prog_data->local_invocation_id_regs = dispatch_width * 3 / 8; payload.local_invocation_id_reg = payload.num_regs; - payload.num_regs += local_id_regs; + payload.num_regs += prog_data->local_invocation_id_regs; } } @@ -5171,6 +5194,42 @@ brw_wm_fs_emit(struct brw_context *brw, return g.get_assembly(final_assembly_size); } +void +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data, + void *buffer, uint32_t threads, uint32_t stride) +{ + if (prog_data->local_invocation_id_regs == 0) + return; + + /* 'stride' should be an integer number of registers, that is, a multiple + * of 32 bytes. + */ + assert(stride % 32 == 0); + + unsigned x = 0, y = 0, z = 0; + for (unsigned t = 0; t < threads; t++) { + uint32_t *param = (uint32_t *) buffer + stride * t / 4; + + for (unsigned i = 0; i < prog_data->simd_size; i++) { + param[0 * prog_data->simd_size + i] = x; + param[1 * prog_data->simd_size + i] = y; + param[2 * prog_data->simd_size + i] = z; + + x++; + if (x == prog_data->local_size[0]) { + x = 0; + y++; + if (y == prog_data->local_size[1]) { + y = 0; + z++; + if (z == prog_data->local_size[2]) + z = 0; + } + } + } + } +} + fs_reg * fs_visitor::emit_cs_local_invocation_id_setup() { diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 5edc4fc9842..6aeb0cb243f 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -70,10 +70,8 @@ brw_upload_cs_state(struct brw_context *brw) unsigned local_id_dwords = 0; - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { - local_id_dwords = - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); - } + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; unsigned push_constant_data_size = (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value); @@ -190,63 +188,6 @@ const struct brw_tracked_state brw_cs_state = { }; -/** - * We are building the local ID push constant data using the simplest possible - * method. We simply push the local IDs directly as they should appear in the - * registers for the uvec3 gl_LocalInvocationID variable. - * - * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 - * registers worth of push constant space. - * - * Note: Any updates to brw_cs_prog_local_id_payload_dwords, - * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need - * to coordinated. - * - * FINISHME: There are a few easy optimizations to consider. - * - * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is - * no need for using push constant space for that dimension. - * - * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can - * easily use 16-bit words rather than 32-bit dwords in the push constant - * data. - * - * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for - * conveying the data, and thereby reduce push constant usage. - * - */ -unsigned -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width) -{ - return 3 * dispatch_width; -} - - -static void -fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, - void *buffer, unsigned *x, unsigned *y, unsigned *z) -{ - uint32_t *param = (uint32_t *)buffer; - for (unsigned i = 0; i < cs_prog_data->simd_size; i++) { - param[0 * cs_prog_data->simd_size + i] = *x; - param[1 * cs_prog_data->simd_size + i] = *y; - param[2 * cs_prog_data->simd_size + i] = *z; - - (*x)++; - if (*x == cs_prog_data->local_size[0]) { - *x = 0; - (*y)++; - if (*y == cs_prog_data->local_size[1]) { - *y = 0; - (*z)++; - if (*z == cs_prog_data->local_size[2]) - *z = 0; - } - } - } -} - - /** * Creates a region containing the push constants for the CS on gen7+. * @@ -269,10 +210,8 @@ brw_upload_cs_push_constants(struct brw_context *brw, (struct brw_stage_prog_data*) cs_prog_data; unsigned local_id_dwords = 0; - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { - local_id_dwords = - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); - } + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. @@ -302,14 +241,13 @@ brw_upload_cs_push_constants(struct brw_context *brw, STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); + brw_cs_fill_local_id_payload(cs_prog_data, param, threads, + reg_aligned_constant_size); + /* _NEW_PROGRAM_CONSTANTS */ - unsigned x = 0, y = 0, z = 0; for (t = 0; t < threads; t++) { - gl_constant_value *next_param = ¶m[t * param_aligned_count]; - if (local_id_dwords > 0) { - fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z); - next_param += local_id_dwords; - } + gl_constant_value *next_param = + ¶m[t * param_aligned_count + local_id_dwords]; for (i = 0; i < prog_data->nr_params; i++) { next_param[i] = *prog_data->param[i]; } -- cgit v1.2.3 From cc4683992bf34fb09871e976abb8eebe7d68c3fe Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 6 Oct 2015 16:11:08 -0700 Subject: i965: Move brw_mark_surface_used() to brw_shader.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit brw_program.c won't be part of the compiler library, but we need brw_mark_surface_used() in the compiler. Move to brw_shader.cpp. Reviewed-by: Topi Pohjolainen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_program.c | 10 ---------- src/mesa/drivers/dri/i965/brw_shader.cpp | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index a034dacd87b..108eb3421a0 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -530,16 +530,6 @@ brw_destroy_shader_time(struct brw_context *brw) brw->shader_time.bo = NULL; } -void -brw_mark_surface_used(struct brw_stage_prog_data *prog_data, - unsigned surf_index) -{ - assert(surf_index < BRW_MAX_SURFACES); - - prog_data->binding_table.size_bytes = - MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4); -} - void brw_stage_prog_data_free(const void *p) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9ad7f3c3e9a..15f73936583 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -164,6 +164,16 @@ brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) return &shader->base; } +void +brw_mark_surface_used(struct brw_stage_prog_data *prog_data, + unsigned surf_index) +{ + assert(surf_index < BRW_MAX_SURFACES); + + prog_data->binding_table.size_bytes = + MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4); +} + enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type) { -- cgit v1.2.3 From 365e5d78924ffe7d4ce47e3fbadc35c7105e5d34 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 5 Oct 2015 14:02:56 -0700 Subject: i965: Use util_next_power_of_two() for brw_get_scratch_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function computes the next power of two, but at least 1024. We can do that by bitwise or'ing in 1023 and calling util_next_power_of_two(). We use brw_get_scratch_size() from the compiler so we need it out of brw_program.c. We could move it to brw_shader.cpp, but let's make it a small inline function instead. Reviewed-by: Topi Pohjolainen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_context.h | 7 ++++++- src/mesa/drivers/dri/i965/brw_program.c | 12 ------------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1869f284331..aa1284db3ce 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1723,7 +1723,12 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); -int brw_get_scratch_size(int size); +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +static inline int +brw_get_scratch_size(int size) +{ + return util_next_power_of_two(size | 1023); +} void brw_get_scratch_bo(struct brw_context *brw, drm_intel_bo **scratch_bo, int size); void brw_init_shader_time(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 108eb3421a0..69ecc36f2e7 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -242,18 +242,6 @@ brw_add_texrect_params(struct gl_program *prog) } } -/* Per-thread scratch space is a power-of-two multiple of 1KB. */ -int -brw_get_scratch_size(int size) -{ - int i; - - for (i = 1024; i < size; i *= 2) - ; - - return i; -} - void brw_get_scratch_bo(struct brw_context *brw, drm_intel_bo **scratch_bo, int size) -- cgit v1.2.3 From ffc841cae5a4a46c462b5ad5d97017154d3327e2 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 7 Oct 2015 04:19:39 -0700 Subject: i965: Move brw_select_clip_planes() to brw_shader.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We call this from the compiler so move it to brw_shader.cpp. Reviewed-by: Topi Pohjolainen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_shader.cpp | 26 ++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vs.c | 25 ------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 15f73936583..0f743fb43c1 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1227,3 +1227,29 @@ brw_setup_image_uniform_values(gl_shader_stage stage, stage_prog_data->binding_table.image_start + image_idx); } } + +/** + * Decide which set of clip planes should be used when clipping via + * gl_Position or gl_ClipVertex. + */ +gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx) +{ + if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) { + /* There is currently a GLSL vertex shader, so clip according to GLSL + * rules, which means compare gl_ClipVertex (or gl_Position, if + * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes + * that were stored in EyeUserPlane at the time the clip planes were + * specified. + */ + return ctx->Transform.EyeUserPlane; + } else { + /* Either we are using fixed function or an ARB vertex program. In + * either case the clip planes are going to be compared against + * gl_Position (which is in clip coordinates) so we have to clip using + * _ClipUserPlane, which was transformed into clip coordinates by Mesa + * core. + */ + return ctx->Transform._ClipUserPlane; + } +} + diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 63d2e3f0f03..36ba52e858b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -41,31 +41,6 @@ #include "util/ralloc.h" -/** - * Decide which set of clip planes should be used when clipping via - * gl_Position or gl_ClipVertex. - */ -gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx) -{ - if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) { - /* There is currently a GLSL vertex shader, so clip according to GLSL - * rules, which means compare gl_ClipVertex (or gl_Position, if - * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes - * that were stored in EyeUserPlane at the time the clip planes were - * specified. - */ - return ctx->Transform.EyeUserPlane; - } else { - /* Either we are using fixed function or an ARB vertex program. In - * either case the clip planes are going to be compared against - * gl_Position (which is in clip coordinates) so we have to clip using - * _ClipUserPlane, which was transformed into clip coordinates by Mesa - * core. - */ - return ctx->Transform._ClipUserPlane; - } -} - bool brw_codegen_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, -- cgit v1.2.3 From ee0f0108c8e87b9cfec25bade66670bbc4254139 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 7 Oct 2015 05:06:30 -0700 Subject: i965: Move brw_get_shader_time_index() call out of emit functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit brw_get_shader_time_index() is all tangled up in brw_context state and we can't call it from the compiler. Thanks the Jasons recent refactoring, we can just get the index and pass to the emit functions instead. Reviewed-by: Jason Ekstrand Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_cs.c | 6 +++++- src/mesa/drivers/dri/i965/brw_cs.h | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 20 ++++++-------------- src/mesa/drivers/dri/i965/brw_gs.c | 6 +++++- src/mesa/drivers/dri/i965/brw_vec4.cpp | 9 +++------ src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 11 ++++------- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 1 + src/mesa/drivers/dri/i965/brw_vs.c | 6 +++++- src/mesa/drivers/dri/i965/brw_vs.h | 1 + src/mesa/drivers/dri/i965/brw_wm.c | 8 +++++++- src/mesa/drivers/dri/i965/brw_wm.h | 2 ++ 11 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 34680ee6e3a..45fb816c160 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -101,8 +101,12 @@ brw_codegen_cs_prog(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_CS)) brw_dump_ir("compute", prog, &cs->base, &cp->program.Base); + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, &cp->program.Base, ST_CS); + program = brw_cs_emit(brw, mem_ctx, key, &prog_data, - &cp->program, prog, &program_size); + &cp->program, prog, st_index, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index c07eb6ca6ee..17c2ff9871a 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -46,6 +46,7 @@ brw_cs_emit(struct brw_context *brw, struct brw_cs_prog_data *prog_data, struct gl_compute_program *cp, struct gl_shader_program *prog, + int shader_time_index, unsigned *final_assembly_size); void diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6ce15703340..f1282a0948f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5121,18 +5121,13 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_prog_data *prog_data, struct gl_fragment_program *fp, struct gl_shader_program *prog, + int shader_time_index8, int shader_time_index16, unsigned *final_assembly_size) { - int st_index8 = -1, st_index16 = -1; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) { - st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8); - st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16); - } - /* Now the main event: Visit the shader IR and generate our FS IR for it. */ fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &fp->Base, fp->Base.nir, 8, st_index8); + &prog_data->base, &fp->Base, fp->Base.nir, 8, shader_time_index8); if (!v.run_fs(false /* do_rep_send */)) { if (prog) { prog->LinkStatus = false; @@ -5147,7 +5142,7 @@ brw_wm_fs_emit(struct brw_context *brw, cfg_t *simd16_cfg = NULL; fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &fp->Base, fp->Base.nir, 16, st_index16); + &prog_data->base, &fp->Base, fp->Base.nir, 16, shader_time_index16); if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ @@ -5274,6 +5269,7 @@ brw_cs_emit(struct brw_context *brw, struct brw_cs_prog_data *prog_data, struct gl_compute_program *cp, struct gl_shader_program *prog, + int shader_time_index, unsigned *final_assembly_size) { prog_data->local_size[0] = cp->LocalSize[0]; @@ -5285,14 +5281,10 @@ brw_cs_emit(struct brw_context *brw, cfg_t *cfg = NULL; const char *fail_msg = NULL; - int st_index = -1; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS); - /* Now the main event: Visit the shader IR and generate our CS IR for it. */ fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &cp->Base, cp->Base.nir, 8, st_index); + &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { @@ -5301,7 +5293,7 @@ brw_cs_emit(struct brw_context *brw, } fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &cp->Base, cp->Base.nir, 16, st_index); + &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && local_workgroup_size <= 16 * brw->max_cs_threads) { diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 26c91e4c76a..e0165fb4a23 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -294,10 +294,14 @@ brw_codegen_gs_prog(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_GS)) brw_dump_ir("geometry", prog, gs, NULL); + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS); + void *mem_ctx = ralloc_context(NULL); unsigned program_size; const unsigned *program = - brw_gs_emit(brw, prog, &c, mem_ctx, &program_size); + brw_gs_emit(brw, prog, &c, mem_ctx, st_index, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 4b4a216d5f5..e966b96a5ca 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1943,21 +1943,18 @@ brw_vs_emit(struct brw_context *brw, struct brw_vs_prog_data *prog_data, struct gl_vertex_program *vp, struct gl_shader_program *prog, + int shader_time_index, unsigned *final_assembly_size) { const unsigned *assembly = NULL; - int st_index = -1; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS); - if (brw->intelScreen->compiler->scalar_vs) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key, &prog_data->base.base, NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ - vp->Base.nir, 8, st_index); + vp->Base.nir, 8, shader_time_index); if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { if (prog) { prog->LinkStatus = false; @@ -1995,7 +1992,7 @@ brw_vs_emit(struct brw_context *brw, vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data, vp->Base.nir, brw_select_clip_planes(&brw->ctx), - mem_ctx, st_index, + mem_ctx, shader_time_index, !_mesa_is_gles3(&brw->ctx)); if (!v.run()) { if (prog) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 36ab25d1259..4ce471e0669 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -618,14 +618,11 @@ brw_gs_emit(struct brw_context *brw, struct gl_shader_program *prog, struct brw_gs_compile *c, void *mem_ctx, + int shader_time_index, unsigned *final_assembly_size) { struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - int st_index = -1; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS); - if (brw->gen >= 7) { /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do * so without spilling. If the GS invocations count > 1, then we can't use @@ -637,7 +634,7 @@ brw_gs_emit(struct brw_context *brw, vec4_gs_visitor v(brw->intelScreen->compiler, brw, c, shader->Program->nir, - mem_ctx, true /* no_spills */, st_index); + mem_ctx, true /* no_spills */, shader_time_index); if (v.run()) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, @@ -681,12 +678,12 @@ brw_gs_emit(struct brw_context *brw, gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw, c, shader->Program->nir, mem_ctx, false /* no_spills */, - st_index); + shader_time_index); else gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw, c, prog, shader->Program->nir, mem_ctx, false /* no_spills */, - st_index); + shader_time_index); if (!gs->run()) { prog->LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index da93f0dc1c6..3ff195c3e68 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -55,6 +55,7 @@ const unsigned *brw_gs_emit(struct brw_context *brw, struct gl_shader_program *prog, struct brw_gs_compile *c, void *mem_ctx, + int shader_time_index, unsigned *final_assembly_size); #ifdef __cplusplus diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 36ba52e858b..38de98fab86 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -173,10 +173,14 @@ brw_codegen_vs_prog(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &vs->base, &vp->program.Base); + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS); + /* Emit GEN4 code. */ program = brw_vs_emit(brw, mem_ctx, key, &prog_data, - &vp->program, prog, &program_size); + &vp->program, prog, st_index, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 19551c9d550..f1242f61b33 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -60,6 +60,7 @@ const unsigned *brw_vs_emit(struct brw_context *brw, struct brw_vs_prog_data *prog_data, struct gl_vertex_program *vp, struct gl_shader_program *shader_prog, + int shader_time_index, unsigned *program_size); void brw_vs_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 81a1d90bb60..4d5e7f67bd6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -224,8 +224,14 @@ brw_codegen_wm_prog(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_WM)) brw_dump_ir("fragment", prog, &fs->base, &fp->program.Base); + int st_index8 = -1, st_index16 = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) { + st_index8 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS8); + st_index16 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS16); + } + program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data, - &fp->program, prog, &program_size); + &fp->program, prog, st_index8, st_index16, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 77b83b0a3f8..6ee22b2f907 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -72,6 +72,8 @@ const unsigned *brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_prog_data *prog_data, struct gl_fragment_program *fp, struct gl_shader_program *prog, + int shader_time_index8, + int shader_time_index16, unsigned *final_assembly_size); GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); -- cgit v1.2.3 From 9a2573e5fc63f48cde56efdb191c129e7d7fb7b1 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 7 Oct 2015 05:13:50 -0700 Subject: i965/cs: Get max_cs_threads from brw_compiler devinfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Jason Ekstrand Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_fs.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f1282a0948f..ba62fdd4b86 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5277,6 +5277,7 @@ brw_cs_emit(struct brw_context *brw, prog_data->local_size[2] = cp->LocalSize[2]; unsigned local_workgroup_size = cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2]; + unsigned max_cs_threads = brw->intelScreen->compiler->devinfo->max_cs_threads; cfg_t *cfg = NULL; const char *fail_msg = NULL; @@ -5287,7 +5288,7 @@ brw_cs_emit(struct brw_context *brw, &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; - } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { + } else if (local_workgroup_size <= 8 * max_cs_threads) { cfg = v8.cfg; prog_data->simd_size = 8; } @@ -5296,7 +5297,7 @@ brw_cs_emit(struct brw_context *brw, &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && - local_workgroup_size <= 16 * brw->max_cs_threads) { + local_workgroup_size <= 16 * max_cs_threads) { /* Try a SIMD16 compile */ v16.import_uniforms(&v8); if (!v16.run_cs()) { -- cgit v1.2.3 From 08d890d3bb82eb1f5c71cf2eab1521c20c4d154c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 6 Oct 2015 16:54:52 -0700 Subject: i965: Break out backend compiler to its own library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces a new libtool helper library, libi965_compiler.la. This library is moderately self-contained, but still needs to link to all of libmesa.la among other things. Reviewed-by: Jason Ekstrand Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/Makefile.am | 6 +- src/mesa/drivers/dri/i965/Makefile.sources | 152 +++++++++++++++-------------- 2 files changed, 81 insertions(+), 77 deletions(-) diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 566f2ddd98f..82e58a6baa8 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -41,9 +41,11 @@ AM_CFLAGS = \ AM_CXXFLAGS = $(AM_CFLAGS) -noinst_LTLIBRARIES = libi965_dri.la +noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la libi965_dri_la_SOURCES = $(i965_FILES) -libi965_dri_la_LIBADD = $(INTEL_LIBS) +libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS) + +libi965_compiler_la_SOURCES = $(i965_compiler_FILES) TEST_LIBS = \ libi965_dri.la \ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 7e1586ffbbd..81ef6283fa1 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -1,47 +1,18 @@ -i965_FILES = \ - brw_binding_tables.c \ - brw_blorp_blit.cpp \ - brw_blorp_blit_eu.cpp \ - brw_blorp_blit_eu.h \ - brw_blorp.cpp \ - brw_blorp.h \ - brw_cc.c \ +i965_compiler_FILES = \ brw_cfg.cpp \ brw_cfg.h \ - brw_clear.c \ - brw_clip.c \ - brw_clip.h \ - brw_clip_line.c \ - brw_clip_point.c \ - brw_clip_state.c \ - brw_clip_tri.c \ - brw_clip_unfilled.c \ - brw_clip_util.c \ - brw_compute.c \ - brw_conditional_render.c \ - brw_context.c \ - brw_context.h \ - brw_cs.c \ - brw_cs.h \ brw_cubemap_normalize.cpp \ - brw_curbe.c \ brw_dead_control_flow.cpp \ brw_dead_control_flow.h \ brw_defines.h \ brw_device_info.c \ brw_device_info.h \ brw_disasm.c \ - brw_draw.c \ - brw_draw.h \ - brw_draw_upload.c \ brw_eu.c \ brw_eu_compact.c \ brw_eu_emit.c \ brw_eu.h \ brw_eu_util.c \ - brw_ff_gs.c \ - brw_ff_gs_emit.c \ - brw_ff_gs.h \ brw_fs_builder.h \ brw_fs_channel_expressions.cpp \ brw_fs_cmod_propagation.cpp \ @@ -64,58 +35,24 @@ i965_FILES = \ brw_fs_validate.cpp \ brw_fs_vector_splitting.cpp \ brw_fs_visitor.cpp \ - brw_gs.c \ - brw_gs.h \ - brw_gs_state.c \ - brw_gs_surface_state.c \ brw_inst.h \ brw_interpolation_map.c \ brw_ir_allocator.h \ brw_ir_fs.h \ brw_ir_vec4.h \ - brw_link.cpp \ brw_lower_texture_gradients.cpp \ brw_lower_unnormalized_offset.cpp \ - brw_meta_fast_clear.c \ - brw_meta_stencil_blit.c \ - brw_meta_updownsample.c \ - brw_meta_util.c \ - brw_meta_util.h \ - brw_misc_state.c \ - brw_multisample_state.h \ brw_nir.h \ brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ brw_nir_uniforms.cpp \ - brw_object_purgeable.c \ brw_packed_float.c \ - brw_performance_monitor.c \ - brw_pipe_control.c \ brw_predicated_break.cpp \ - brw_primitive_restart.c \ - brw_program.c \ - brw_program.h \ - brw_queryobj.c \ brw_reg.h \ - brw_reset.c \ - brw_sampler_state.c \ brw_schedule_instructions.cpp \ - brw_sf.c \ - brw_sf_emit.c \ - brw_sf.h \ - brw_sf_state.c \ brw_shader.cpp \ brw_shader.h \ - brw_state_batch.c \ - brw_state_cache.c \ - brw_state_dump.c \ - brw_state.h \ - brw_state_upload.c \ - brw_structs.h \ brw_surface_formats.c \ - brw_tex.c \ - brw_tex_layout.c \ - brw_urb.c \ brw_util.c \ brw_util.h \ brw_vec4_builder.h \ @@ -136,14 +73,88 @@ i965_FILES = \ brw_vec4_surface_builder.h \ brw_vec4_visitor.cpp \ brw_vec4_vs_visitor.cpp \ + brw_vue_map.c \ + brw_wm_iz.cpp \ + gen6_gs_visitor.cpp \ + gen6_gs_visitor.h \ + intel_asm_annotation.c \ + intel_asm_annotation.h \ + intel_debug.c \ + intel_debug.h \ + intel_reg.h \ + intel_resolve_map.c \ + intel_resolve_map.h + +i965_FILES = \ + brw_binding_tables.c \ + brw_blorp_blit.cpp \ + brw_blorp_blit_eu.cpp \ + brw_blorp_blit_eu.h \ + brw_blorp.cpp \ + brw_blorp.h \ + brw_cc.c \ + brw_clear.c \ + brw_clip.c \ + brw_clip.h \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_unfilled.c \ + brw_clip_util.c \ + brw_compute.c \ + brw_conditional_render.c \ + brw_context.c \ + brw_context.h \ + brw_cs.c \ + brw_cs.h \ + brw_curbe.c \ + brw_draw.c \ + brw_draw.h \ + brw_draw_upload.c \ + brw_ff_gs.c \ + brw_ff_gs_emit.c \ + brw_ff_gs.h \ + brw_gs.c \ + brw_gs.h \ + brw_gs_state.c \ + brw_gs_surface_state.c \ + brw_link.cpp \ + brw_meta_fast_clear.c \ + brw_meta_stencil_blit.c \ + brw_meta_updownsample.c \ + brw_meta_util.c \ + brw_meta_util.h \ + brw_misc_state.c \ + brw_multisample_state.h \ + brw_object_purgeable.c \ + brw_performance_monitor.c \ + brw_pipe_control.c \ + brw_program.c \ + brw_program.h \ + brw_primitive_restart.c \ + brw_queryobj.c \ + brw_reset.c \ + brw_sampler_state.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf.h \ + brw_sf_state.c \ + brw_state_batch.c \ + brw_state_cache.c \ + brw_state_dump.c \ + brw_state.h \ + brw_state_upload.c \ + brw_structs.h \ + brw_tex.c \ + brw_tex_layout.c \ + brw_urb.c \ brw_vs.c \ brw_vs.h \ brw_vs_state.c \ brw_vs_surface_state.c \ - brw_vue_map.c \ brw_wm.c \ brw_wm.h \ - brw_wm_iz.cpp \ brw_wm_state.c \ brw_wm_surface_state.c \ gen6_blorp.cpp \ @@ -153,8 +164,6 @@ i965_FILES = \ gen6_depth_state.c \ gen6_depthstencil.c \ gen6_gs_state.c \ - gen6_gs_visitor.cpp \ - gen6_gs_visitor.h \ gen6_multisample_state.c \ gen6_queryobj.c \ gen6_sampler_state.c \ @@ -193,8 +202,6 @@ i965_FILES = \ gen8_viewport_state.c \ gen8_vs_state.c \ gen8_wm_depth_stencil.c \ - intel_asm_annotation.c \ - intel_asm_annotation.h \ intel_batchbuffer.c \ intel_batchbuffer.h \ intel_blit.c \ @@ -204,8 +211,6 @@ i965_FILES = \ intel_buffers.c \ intel_buffers.h \ intel_copy_image.c \ - intel_debug.c \ - intel_debug.h \ intel_extensions.c \ intel_fbo.c \ intel_fbo.h \ @@ -218,9 +223,6 @@ i965_FILES = \ intel_pixel_draw.c \ intel_pixel.h \ intel_pixel_read.c \ - intel_reg.h \ - intel_resolve_map.c \ - intel_resolve_map.h \ intel_screen.c \ intel_screen.h \ intel_state.c \ -- cgit v1.2.3 From c71f0d45e6d0081ea814fb0b16baec4e75a07bcb Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Wed, 7 Oct 2015 05:09:48 -0700 Subject: i965: Link compiler unit tests to libi965_compiler.la MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can now link the unit tests against just libi965_compiler.la. This lets us drop a lot of DRI driver dependencies, but we still pull in all of libmesa and more. This also provides a few standalone users of libi965_compiler.la, which will help us accidentally using i965_dri.so functions from the compiler. Reviewed-by: Jason Ekstrand Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/Makefile.am | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 82e58a6baa8..2e241511049 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -48,13 +48,9 @@ libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS) libi965_compiler_la_SOURCES = $(i965_compiler_FILES) TEST_LIBS = \ - libi965_dri.la \ - ../common/libdricommon.la \ - ../common/libxmlconfig.la \ - ../common/libmegadriver_stub.la \ + libi965_compiler.la \ ../../../libmesa.la \ - $(DRI_LIB_DEPS) \ - $(CLOCK_LIB) \ + -lpthread -ldl \ ../common/libdri_test_stubs.la TESTS = \ -- cgit v1.2.3