diff options
Diffstat (limited to 'src/gallium')
175 files changed, 5898 insertions, 2187 deletions
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk index 39e064e9538..b406d4a5480 100644 --- a/src/gallium/Android.mk +++ b/src/gallium/Android.mk @@ -27,6 +27,7 @@ GALLIUM_TOP := $(call my-dir) GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk SUBDIRS := auxiliary +SUBDIRS += auxiliary/pipe-loader # # Gallium drivers and their respective winsys diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc index ee07ab6c8f9..6fe2e22fecf 100644 --- a/src/gallium/Automake.inc +++ b/src/gallium/Automake.inc @@ -67,3 +67,9 @@ if HAVE_DRISW GALLIUM_PIPE_LOADER_WINSYS_LIBS += \ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la endif + +if HAVE_DRISW_KMS +GALLIUM_PIPE_LOADER_WINSYS_LIBS += \ + $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ + $(LIBDRM_LIBS) +endif diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index 611d55fafe2..e42a8f17703 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -5,6 +5,7 @@ SUBDIRS = ## SUBDIRS += auxiliary +SUBDIRS += auxiliary/pipe-loader ## ## Gallium pipe drivers and their respective winsys' @@ -98,7 +99,7 @@ if HAVE_DRISW SUBDIRS += winsys/sw/dri endif -if HAVE_DRI2 +if HAVE_DRISW_KMS SUBDIRS += winsys/sw/kms-dri endif @@ -120,7 +121,8 @@ EXTRA_DIST = \ ## Gallium state trackers and their users (targets) ## -if HAVE_LOADER_GALLIUM +## XXX: Rename the conditional once we have a config switch for static/dynamic pipe-drivers +if HAVE_CLOVER SUBDIRS += targets/pipe-loader endif diff --git a/src/gallium/SConscript b/src/gallium/SConscript index fa5fa6e8734..0c3a3742c16 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -5,6 +5,7 @@ Import('env') # SConscript('auxiliary/SConscript') +SConscript('auxiliary/pipe-loader/SConscript') # # Drivers diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am index a728162bd9d..ee296ceda33 100644 --- a/src/gallium/auxiliary/Makefile.am +++ b/src/gallium/auxiliary/Makefile.am @@ -1,7 +1,3 @@ -if HAVE_LOADER_GALLIUM -SUBDIRS := pipe-loader -endif - include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc @@ -66,15 +62,7 @@ COMMON_VL_CFLAGS = \ $(AM_CFLAGS) \ $(VL_CFLAGS) \ $(DRI2PROTO_CFLAGS) \ - $(LIBDRM_CFLAGS) \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" - -if HAVE_GALLIUM_STATIC_TARGETS -COMMON_VL_CFLAGS += \ - -DGALLIUM_STATIC_TARGETS=1 - -endif # HAVE_GALLIUM_STATIC_TARGETS + $(LIBDRM_CFLAGS) noinst_LTLIBRARIES += libgalliumvl.la diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 82ef5ecfce4..61601920a94 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -219,8 +219,6 @@ C_SOURCES := \ util/u_format.h \ util/u_format_etc.c \ util/u_format_etc.h \ - util/u_format_fake.c \ - util/u_format_fake.h \ util/u_format_latc.c \ util/u_format_latc.h \ util/u_format_other.c \ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 7bda1184ee9..3ee708f4fad 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -536,6 +536,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if defined(PIPE_ARCH_PPC) MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); +#if HAVE_LLVM >= 0x0304 + /* + * Make sure VSX instructions are disabled + * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 + */ + if (util_cpu_caps.has_altivec) { + MAttrs.push_back("-vsx"); + } +#endif #endif builder.setMAttrs(MAttrs); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 7d2cd9a9e73..28c7a86316e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -2608,7 +2608,12 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, params.type = bld->bld_base.base.type; params.sample_key = sample_key; params.texture_index = unit; - params.sampler_index = unit; + /* + * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS + * and trigger some assertions with d3d10 where the sampler view number + * can exceed this. + */ + params.sampler_index = 0; params.context_ptr = bld->context_ptr; params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c index ffe30b8fa79..efceb85e38d 100644 --- a/src/gallium/auxiliary/hud/hud_context.c +++ b/src/gallium/auxiliary/hud/hud_context.c @@ -33,6 +33,7 @@ * Set GALLIUM_HUD=help for more info. */ +#include <signal.h> #include <stdio.h> #include "hud/hud_context.h" @@ -51,12 +52,15 @@ #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" +/* Control the visibility of all HUD contexts */ +static boolean huds_visible = TRUE; struct hud_context { struct pipe_context *pipe; struct cso_context *cso; struct u_upload_mgr *uploader; + struct hud_batch_query_context *batch_query; struct list_head pane_list; /* states */ @@ -95,6 +99,13 @@ struct hud_context { } text, bg, whitelines; }; +#ifdef PIPE_OS_UNIX +static void +signal_visible_handler(int sig, siginfo_t *siginfo, void *context) +{ + huds_visible = !huds_visible; +} +#endif static void hud_draw_colored_prims(struct hud_context *hud, unsigned prim, @@ -441,6 +452,9 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) struct hud_pane *pane; struct hud_graph *gr; + if (!huds_visible) + return; + hud->fb_width = tex->width0; hud->fb_height = tex->height0; hud->constants.two_div_fb_width = 2.0f / hud->fb_width; @@ -510,6 +524,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) hud_alloc_vertices(hud, &hud->text, 4 * 512, 4 * sizeof(float)); /* prepare all graphs */ + hud_batch_query_update(hud->batch_query); + LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) { LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) { gr->query_new_value(gr); @@ -903,17 +919,21 @@ hud_parse_env_var(struct hud_context *hud, const char *env) } else if (strcmp(name, "samples-passed") == 0 && has_occlusion_query(hud->pipe->screen)) { - hud_pipe_query_install(pane, hud->pipe, "samples-passed", + hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, + "samples-passed", PIPE_QUERY_OCCLUSION_COUNTER, 0, 0, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE); + PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, + 0); } else if (strcmp(name, "primitives-generated") == 0 && has_streamout(hud->pipe->screen)) { - hud_pipe_query_install(pane, hud->pipe, "primitives-generated", + hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, + "primitives-generated", PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE); + PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, + 0); } else { boolean processed = FALSE; @@ -938,17 +958,19 @@ hud_parse_env_var(struct hud_context *hud, const char *env) if (strcmp(name, pipeline_statistics_names[i]) == 0) break; if (i < Elements(pipeline_statistics_names)) { - hud_pipe_query_install(pane, hud->pipe, name, + hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, name, PIPE_QUERY_PIPELINE_STATISTICS, i, 0, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE); + PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, + 0); processed = TRUE; } } /* driver queries */ if (!processed) { - if (!hud_driver_query_install(pane, hud->pipe, name)){ + if (!hud_driver_query_install(&hud->batch_query, pane, hud->pipe, + name)) { fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", name); } } @@ -1125,6 +1147,12 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso) struct pipe_sampler_view view_templ; unsigned i; const char *env = debug_get_option("GALLIUM_HUD", NULL); + unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0); +#ifdef PIPE_OS_UNIX + static boolean sig_handled = FALSE; + struct sigaction action = {}; +#endif + huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE); if (!env || !*env) return NULL; @@ -1267,6 +1295,22 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso) LIST_INITHEAD(&hud->pane_list); + /* setup sig handler once for all hud contexts */ +#ifdef PIPE_OS_UNIX + if (!sig_handled && signo != 0) { + action.sa_sigaction = &signal_visible_handler; + action.sa_flags = SA_SIGINFO; + + if (signo >= NSIG) + fprintf(stderr, "gallium_hud: invalid signal %u\n", signo); + else if (sigaction(signo, &action, NULL) < 0) + fprintf(stderr, "gallium_hud: unable to set handler for signal %u\n", signo); + fflush(stderr); + + sig_handled = TRUE; + } +#endif + hud_parse_env_var(hud, env); return hud; } @@ -1287,6 +1331,7 @@ hud_destroy(struct hud_context *hud) FREE(pane); } + hud_batch_query_cleanup(&hud->batch_query); pipe->delete_fs_state(pipe, hud->fs_color); pipe->delete_fs_state(pipe, hud->fs_text); pipe->delete_vs_state(pipe, hud->vs); diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c index f14305ea835..d7b1f11ed56 100644 --- a/src/gallium/auxiliary/hud/hud_driver_query.c +++ b/src/gallium/auxiliary/hud/hud_driver_query.c @@ -34,13 +34,164 @@ #include "hud/hud_private.h" #include "pipe/p_screen.h" #include "os/os_time.h" +#include "util/u_math.h" #include "util/u_memory.h" #include <stdio.h> +// Must be a power of two #define NUM_QUERIES 8 +struct hud_batch_query_context { + struct pipe_context *pipe; + unsigned num_query_types; + unsigned allocated_query_types; + unsigned *query_types; + + boolean failed; + struct pipe_query *query[NUM_QUERIES]; + union pipe_query_result *result[NUM_QUERIES]; + unsigned head, pending, results; +}; + +void +hud_batch_query_update(struct hud_batch_query_context *bq) +{ + struct pipe_context *pipe; + + if (!bq || bq->failed) + return; + + pipe = bq->pipe; + + if (bq->query[bq->head]) + pipe->end_query(pipe, bq->query[bq->head]); + + bq->results = 0; + + while (bq->pending) { + unsigned idx = (bq->head - bq->pending + 1) % NUM_QUERIES; + struct pipe_query *query = bq->query[idx]; + + if (!bq->result[idx]) + bq->result[idx] = MALLOC(sizeof(bq->result[idx]->batch[0]) * + bq->num_query_types); + if (!bq->result[idx]) { + fprintf(stderr, "gallium_hud: out of memory.\n"); + bq->failed = TRUE; + return; + } + + if (!pipe->get_query_result(pipe, query, FALSE, bq->result[idx])) + break; + + ++bq->results; + --bq->pending; + } + + bq->head = (bq->head + 1) % NUM_QUERIES; + + if (bq->pending == NUM_QUERIES) { + fprintf(stderr, + "gallium_hud: all queries busy after %i frames, dropping data.\n", + NUM_QUERIES); + + assert(bq->query[bq->head]); + + pipe->destroy_query(bq->pipe, bq->query[bq->head]); + bq->query[bq->head] = NULL; + } + + ++bq->pending; + + if (!bq->query[bq->head]) { + bq->query[bq->head] = pipe->create_batch_query(pipe, + bq->num_query_types, + bq->query_types); + + if (!bq->query[bq->head]) { + fprintf(stderr, + "gallium_hud: create_batch_query failed. You may have " + "selected too many or incompatible queries.\n"); + bq->failed = TRUE; + return; + } + } + + if (!pipe->begin_query(pipe, bq->query[bq->head])) { + fprintf(stderr, + "gallium_hud: could not begin batch query. You may have " + "selected too many or incompatible queries.\n"); + bq->failed = TRUE; + } +} + +static boolean +batch_query_add(struct hud_batch_query_context **pbq, + struct pipe_context *pipe, unsigned query_type, + unsigned *result_index) +{ + struct hud_batch_query_context *bq = *pbq; + unsigned i; + + if (!bq) { + bq = CALLOC_STRUCT(hud_batch_query_context); + if (!bq) + return false; + bq->pipe = pipe; + *pbq = bq; + } + + for (i = 0; i < bq->num_query_types; ++i) { + if (bq->query_types[i] == query_type) { + *result_index = i; + return true; + } + } + + if (bq->num_query_types == bq->allocated_query_types) { + unsigned new_alloc = MAX2(16, bq->allocated_query_types * 2); + unsigned *new_query_types + = REALLOC(bq->query_types, + bq->allocated_query_types * sizeof(unsigned), + new_alloc * sizeof(unsigned)); + if (!new_query_types) + return false; + bq->query_types = new_query_types; + bq->allocated_query_types = new_alloc; + } + + bq->query_types[bq->num_query_types] = query_type; + *result_index = bq->num_query_types++; + return true; +} + +void +hud_batch_query_cleanup(struct hud_batch_query_context **pbq) +{ + struct hud_batch_query_context *bq = *pbq; + unsigned idx; + + if (!bq) + return; + + *pbq = NULL; + + if (bq->query[bq->head] && !bq->failed) + bq->pipe->end_query(bq->pipe, bq->query[bq->head]); + + for (idx = 0; idx < NUM_QUERIES; ++idx) { + if (bq->query[idx]) + bq->pipe->destroy_query(bq->pipe, bq->query[idx]); + FREE(bq->result[idx]); + } + + FREE(bq->query_types); + FREE(bq); +} + struct query_info { struct pipe_context *pipe; + struct hud_batch_query_context *batch; unsigned query_type; unsigned result_index; /* unit depends on query_type */ enum pipe_driver_query_result_type result_type; @@ -48,7 +199,6 @@ struct query_info { /* Ring of queries. If a query is busy, we use another slot. */ struct pipe_query *query[NUM_QUERIES]; unsigned head, tail; - unsigned num_queries; uint64_t last_time; uint64_t results_cumulative; @@ -56,11 +206,26 @@ struct query_info { }; static void -query_new_value(struct hud_graph *gr) +query_new_value_batch(struct query_info *info) +{ + struct hud_batch_query_context *bq = info->batch; + unsigned result_index = info->result_index; + unsigned idx = (bq->head - bq->pending) % NUM_QUERIES; + unsigned results = bq->results; + + while (results) { + info->results_cumulative += bq->result[idx]->batch[result_index].u64; + ++info->num_results; + + --results; + idx = (idx - 1) % NUM_QUERIES; + } +} + +static void +query_new_value_normal(struct query_info *info) { - struct query_info *info = gr->query_data; struct pipe_context *pipe = info->pipe; - uint64_t now = os_time_get(); if (info->last_time) { if (info->query[info->head]) @@ -107,30 +272,9 @@ query_new_value(struct hud_graph *gr) break; } } - - if (info->num_results && info->last_time + gr->pane->period <= now) { - uint64_t value; - - switch (info->result_type) { - default: - case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE: - value = info->results_cumulative / info->num_results; - break; - case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE: - value = info->results_cumulative; - break; - } - - hud_graph_add_value(gr, value); - - info->last_time = now; - info->results_cumulative = 0; - info->num_results = 0; - } } else { /* initialize */ - info->last_time = now; info->query[info->head] = pipe->create_query(pipe, info->query_type, 0); } @@ -139,11 +283,49 @@ query_new_value(struct hud_graph *gr) } static void +query_new_value(struct hud_graph *gr) +{ + struct query_info *info = gr->query_data; + uint64_t now = os_time_get(); + + if (info->batch) { + query_new_value_batch(info); + } else { + query_new_value_normal(info); + } + + if (!info->last_time) { + info->last_time = now; + return; + } + + if (info->num_results && info->last_time + gr->pane->period <= now) { + uint64_t value; + + switch (info->result_type) { + default: + case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE: + value = info->results_cumulative / info->num_results; + break; + case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE: + value = info->results_cumulative; + break; + } + + hud_graph_add_value(gr, value); + + info->last_time = now; + info->results_cumulative = 0; + info->num_results = 0; + } +} + +static void free_query_info(void *ptr) { struct query_info *info = ptr; - if (info->last_time) { + if (!info->batch && info->last_time) { struct pipe_context *pipe = info->pipe; int i; @@ -159,11 +341,13 @@ free_query_info(void *ptr) } void -hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe, +hud_pipe_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name, unsigned query_type, unsigned result_index, uint64_t max_value, enum pipe_driver_query_type type, - enum pipe_driver_query_result_type result_type) + enum pipe_driver_query_result_type result_type, + unsigned flags) { struct hud_graph *gr; struct query_info *info; @@ -175,28 +359,40 @@ hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe, strncpy(gr->name, name, sizeof(gr->name)); gr->name[sizeof(gr->name) - 1] = '\0'; gr->query_data = CALLOC_STRUCT(query_info); - if (!gr->query_data) { - FREE(gr); - return; - } + if (!gr->query_data) + goto fail_gr; gr->query_new_value = query_new_value; gr->free_query_data = free_query_info; info = gr->query_data; info->pipe = pipe; - info->query_type = query_type; - info->result_index = result_index; info->result_type = result_type; + if (flags & PIPE_DRIVER_QUERY_FLAG_BATCH) { + if (!batch_query_add(pbq, pipe, query_type, &info->result_index)) + goto fail_info; + info->batch = *pbq; + } else { + info->query_type = query_type; + info->result_index = result_index; + } + hud_pane_add_graph(pane, gr); if (pane->max_value < max_value) hud_pane_set_max_value(pane, max_value); pane->type = type; + return; + +fail_info: + FREE(info); +fail_gr: + FREE(gr); } boolean -hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe, +hud_driver_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name) { struct pipe_screen *screen = pipe->screen; @@ -220,8 +416,9 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe, if (!found) return FALSE; - hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0, - query.max_value.u64, query.type, query.result_type); + hud_pipe_query_install(pbq, pane, pipe, query.name, query.query_type, 0, + query.max_value.u64, query.type, query.result_type, + query.flags); return TRUE; } diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h index 01caf7b8b2c..4a788bba456 100644 --- a/src/gallium/auxiliary/hud/hud_private.h +++ b/src/gallium/auxiliary/hud/hud_private.h @@ -80,19 +80,26 @@ void hud_pane_set_max_value(struct hud_pane *pane, uint64_t value); void hud_graph_add_value(struct hud_graph *gr, uint64_t value); /* graphs/queries */ +struct hud_batch_query_context; + #define ALL_CPUS ~0 /* optionally set as cpu_index */ int hud_get_num_cpus(void); void hud_fps_graph_install(struct hud_pane *pane); void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index); -void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe, +void hud_pipe_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name, unsigned query_type, unsigned result_index, uint64_t max_value, enum pipe_driver_query_type type, - enum pipe_driver_query_result_type result_type); -boolean hud_driver_query_install(struct hud_pane *pane, + enum pipe_driver_query_result_type result_type, + unsigned flags); +boolean hud_driver_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name); +void hud_batch_query_update(struct hud_batch_query_context *bq); +void hud_batch_query_cleanup(struct hud_batch_query_context **pbq); #endif diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 0539cfc16a1..86c2ffadbc8 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -295,7 +295,7 @@ ttn_emit_declaration(struct ttn_compile *c) type = nir_type_int; break; case TGSI_RETURN_TYPE_UINT: - type = nir_type_unsigned; + type = nir_type_uint; break; case TGSI_RETURN_TYPE_FLOAT: default: @@ -1239,6 +1239,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) op = nir_texop_tex; num_srcs = 1; break; + case TGSI_OPCODE_TEX2: + op = nir_texop_tex; + num_srcs = 1; + samp = 2; + break; case TGSI_OPCODE_TXP: op = nir_texop_tex; num_srcs = 2; @@ -1275,6 +1280,10 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) num_srcs = 3; samp = 3; break; + case TGSI_OPCODE_LODQ: + op = nir_texop_lod; + num_srcs = 1; + break; default: fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode); @@ -1327,7 +1336,9 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) */ sview = instr->sampler_index; - if (sview < c->num_samp_types) { + if (op == nir_texop_lod) { + instr->dest_type = nir_type_float; + } else if (sview < c->num_samp_types) { instr->dest_type = c->samp_types[sview]; } else { instr->dest_type = nir_type_float; @@ -1394,10 +1405,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) } if (instr->is_shadow) { - if (instr->coord_components < 3) - instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z)); - else + if (instr->coord_components == 4) + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X)); + else if (instr->coord_components == 3) instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); + else + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z)); instr->src[src_number].src_type = nir_tex_src_comparitor; src_number++; @@ -1641,7 +1654,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high, [TGSI_OPCODE_TG4] = 0, - [TGSI_OPCODE_LODQ] = 0, /* XXX */ + [TGSI_OPCODE_LODQ] = 0, [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract, [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract, @@ -1650,7 +1663,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_POPC] = nir_op_bit_count, [TGSI_OPCODE_LSB] = nir_op_find_lsb, [TGSI_OPCODE_IMSB] = nir_op_ifind_msb, - [TGSI_OPCODE_UMSB] = nir_op_ifind_msb, /* XXX: signed vs unsigned */ + [TGSI_OPCODE_UMSB] = nir_op_ufind_msb, [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */ [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */ @@ -1803,11 +1816,13 @@ ttn_emit_instruction(struct ttn_compile *c) case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TEX2: case TGSI_OPCODE_TXL2: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXQ_LZ: case TGSI_OPCODE_TXF: case TGSI_OPCODE_TG4: + case TGSI_OPCODE_LODQ: ttn_tex(c, dest, src); break; diff --git a/src/gallium/auxiliary/os/os_process.c b/src/gallium/auxiliary/os/os_process.c index a6262283d87..d2dcd0d7fbc 100644 --- a/src/gallium/auxiliary/os/os_process.c +++ b/src/gallium/auxiliary/os/os_process.c @@ -54,37 +54,48 @@ boolean os_get_process_name(char *procname, size_t size) { const char *name; + + /* First, check if the GALLIUM_PROCESS_NAME env var is set to + * override the normal process name query. + */ + name = os_get_option("GALLIUM_PROCESS_NAME"); + + if (!name) { + /* do normal query */ + #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) - char szProcessPath[MAX_PATH]; - char *lpProcessName; - char *lpProcessExt; + char szProcessPath[MAX_PATH]; + char *lpProcessName; + char *lpProcessExt; - GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath)); + GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath)); - lpProcessName = strrchr(szProcessPath, '\\'); - lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath; + lpProcessName = strrchr(szProcessPath, '\\'); + lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath; - lpProcessExt = strrchr(lpProcessName, '.'); - if (lpProcessExt) { - *lpProcessExt = '\0'; - } + lpProcessExt = strrchr(lpProcessName, '.'); + if (lpProcessExt) { + *lpProcessExt = '\0'; + } - name = lpProcessName; + name = lpProcessName; #elif defined(__GLIBC__) || defined(__CYGWIN__) - name = program_invocation_short_name; + name = program_invocation_short_name; #elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE) - /* *BSD and OS X */ - name = getprogname(); + /* *BSD and OS X */ + name = getprogname(); #elif defined(PIPE_OS_HAIKU) - image_info info; - get_image_info(B_CURRENT_TEAM, &info); - name = info.name; + image_info info; + get_image_info(B_CURRENT_TEAM, &info); + name = info.name; #else #warning unexpected platform in os_process.c - return FALSE; + return FALSE; #endif + } + assert(size > 0); assert(procname); diff --git a/src/gallium/auxiliary/pipe-loader/Android.mk b/src/gallium/auxiliary/pipe-loader/Android.mk new file mode 100644 index 00000000000..27893137a1a --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/Android.mk @@ -0,0 +1,49 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2015 Emil Velikov <[email protected]> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# NOTE: Currently we build only a 'static' pipe-loader +LOCAL_PATH := $(call my-dir) + +# get COMMON_SOURCES and DRM_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_CFLAGS := \ + -DHAVE_PIPE_LOADER_DRI \ + -DDROP_PIPE_LOADER_MISC \ + -DGALLIUM_STATIC_TARGETS + +LOCAL_SRC_FILES := $(COMMON_SOURCES) + +LOCAL_MODULE := libmesa_pipe_loader + +ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -DHAVE_LIBDRM +LOCAL_SRC_FILES += $(DRM_SOURCES) + +LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_STATIC_LIBRARIES := libmesa_loader +endif + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am index 8c837996539..8039a957b1b 100644 --- a/src/gallium/auxiliary/pipe-loader/Makefile.am +++ b/src/gallium/auxiliary/pipe-loader/Makefile.am @@ -9,20 +9,40 @@ AM_CFLAGS = \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) -noinst_LTLIBRARIES = libpipe_loader.la +noinst_LTLIBRARIES = \ + libpipe_loader_static.la \ + libpipe_loader_dynamic.la -libpipe_loader_la_SOURCES = \ +libpipe_loader_static_la_CFLAGS = \ + $(AM_CFLAGS) \ + -DGALLIUM_STATIC_TARGETS=1 + +libpipe_loader_dynamic_la_CFLAGS = \ + $(AM_CFLAGS) \ + -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" + +libpipe_loader_static_la_SOURCES = \ $(COMMON_SOURCES) -if HAVE_DRM_LOADER_GALLIUM +libpipe_loader_dynamic_la_SOURCES = \ + $(COMMON_SOURCES) + +if HAVE_LIBDRM AM_CFLAGS += \ $(LIBDRM_CFLAGS) -libpipe_loader_la_SOURCES += \ +libpipe_loader_static_la_SOURCES += \ $(DRM_SOURCES) -libpipe_loader_la_LIBADD = \ - $(top_builddir)/src/loader/libloader.la +libpipe_loader_dynamic_la_SOURCES += \ + $(DRM_SOURCES) endif +libpipe_loader_static_la_LIBADD = \ + $(top_builddir)/src/loader/libloader.la + +libpipe_loader_dynamic_la_LIBADD = \ + $(top_builddir)/src/loader/libloader.la + +EXTRA_DIST = SConscript diff --git a/src/gallium/auxiliary/pipe-loader/SConscript b/src/gallium/auxiliary/pipe-loader/SConscript new file mode 100644 index 00000000000..c611fb892f8 --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/SConscript @@ -0,0 +1,33 @@ +Import('*') + +env = env.Clone() + +env.MSVC2008Compat() + +env.Append(CPPPATH = [ + '#/src/loader', + '#/src/gallium/winsys', +]) + +env.Append(CPPDEFINES = [ + ('HAVE_PIPE_LOADER_DRI', '1'), + ('DROP_PIPE_LOADER_MISC', '1'), + ('GALLIUM_STATIC_TARGETS', '1'), +]) + +source = env.ParseSourceList('Makefile.sources', 'COMMON_SOURCES') + +if env['HAVE_DRM']: + source += env.ParseSourceList('Makefile.sources', 'DRM_SOURCES') + + env.PkgUseModules('DRM') + env.Append(LIBS = [libloader]) + +pipe_loader = env.ConvenienceLibrary( + target = 'pipe_loader', + source = source, +) + +env.Alias('pipe_loader', pipe_loader) + +Export('pipe_loader') diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c index 8e79f853b0a..aef996c4617 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c @@ -32,10 +32,15 @@ #include "util/u_string.h" #include "util/u_dl.h" +#ifdef _MSC_VER +#include <stdlib.h> +#define PATH_MAX _MAX_PATH +#endif + #define MODULE_PREFIX "pipe_" static int (*backends[])(struct pipe_loader_device **, int) = { -#ifdef HAVE_PIPE_LOADER_DRM +#ifdef HAVE_LIBDRM &pipe_loader_drm_probe, #endif &pipe_loader_sw_probe @@ -69,10 +74,9 @@ pipe_loader_configuration(struct pipe_loader_device *dev, } struct pipe_screen * -pipe_loader_create_screen(struct pipe_loader_device *dev, - const char *library_paths) +pipe_loader_create_screen(struct pipe_loader_device *dev) { - return dev->ops->create_screen(dev, library_paths); + return dev->ops->create_screen(dev); } struct util_dl_library * diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h index 9b8712666bb..690d088ed82 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h @@ -82,13 +82,9 @@ pipe_loader_probe(struct pipe_loader_device **devs, int ndev); * Create a pipe_screen for the specified device. * * \param dev Device the screen will be created for. - * \param library_paths Colon-separated list of filesystem paths that - * will be used to look for the pipe driver - * module that handles this device. */ struct pipe_screen * -pipe_loader_create_screen(struct pipe_loader_device *dev, - const char *library_paths); +pipe_loader_create_screen(struct pipe_loader_device *dev); /** * Query the configuration parameters for the specified device. @@ -112,8 +108,6 @@ pipe_loader_configuration(struct pipe_loader_device *dev, void pipe_loader_release(struct pipe_loader_device **devs, int ndev); -#ifdef HAVE_PIPE_LOADER_DRI - /** * Initialize sw dri device give the drisw_loader_funcs. * @@ -125,7 +119,15 @@ bool pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf); -#endif +/** + * Initialize a kms backed sw device given an fd. + * + * This function is platform-specific. + * + * \sa pipe_loader_probe + */ +bool +pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd); /** * Initialize a null sw device. @@ -158,8 +160,6 @@ boolean pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev, struct pipe_screen *screen); -#ifdef HAVE_PIPE_LOADER_DRM - /** * Get a list of known DRM devices. * @@ -180,8 +180,6 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev); bool pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd); -#endif - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c index 1799df7e4c5..994a284385c 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c @@ -36,6 +36,7 @@ #include <unistd.h> #include "loader.h" +#include "target-helpers/drm_helper_public.h" #include "state_tracker/drm_driver.h" #include "pipe_loader_priv.h" @@ -50,13 +51,123 @@ struct pipe_loader_drm_device { struct pipe_loader_device base; + const struct drm_driver_descriptor *dd; +#ifndef GALLIUM_STATIC_TARGETS struct util_dl_library *lib; +#endif int fd; }; #define pipe_loader_drm_device(dev) ((struct pipe_loader_drm_device *)dev) -static struct pipe_loader_ops pipe_loader_drm_ops; +static const struct pipe_loader_ops pipe_loader_drm_ops; + +#ifdef GALLIUM_STATIC_TARGETS +static const struct drm_conf_ret throttle_ret = { + DRM_CONF_INT, + {2}, +}; + +static const struct drm_conf_ret share_fd_ret = { + DRM_CONF_BOOL, + {true}, +}; + +static inline const struct drm_conf_ret * +configuration_query(enum drm_conf conf) +{ + switch (conf) { + case DRM_CONF_THROTTLE: + return &throttle_ret; + case DRM_CONF_SHARE_FD: + return &share_fd_ret; + default: + break; + } + return NULL; +} + +static const struct drm_driver_descriptor driver_descriptors[] = { + { + .name = "i915", + .driver_name = "i915", + .create_screen = pipe_i915_create_screen, + .configuration = configuration_query, + }, +#ifdef USE_VC4_SIMULATOR + /* VC4 simulator and ILO (i965) are mutually exclusive (error at + * configure). As the latter is unconditionally added, keep this one above + * it. + */ + { + .name = "i965", + .driver_name = "vc4", + .create_screen = pipe_vc4_create_screen, + .configuration = configuration_query, + }, +#endif + { + .name = "i965", + .driver_name = "i915", + .create_screen = pipe_ilo_create_screen, + .configuration = configuration_query, + }, + { + .name = "nouveau", + .driver_name = "nouveau", + .create_screen = pipe_nouveau_create_screen, + .configuration = configuration_query, + }, + { + .name = "r300", + .driver_name = "radeon", + .create_screen = pipe_r300_create_screen, + .configuration = configuration_query, + }, + { + .name = "r600", + .driver_name = "radeon", + .create_screen = pipe_r600_create_screen, + .configuration = configuration_query, + }, + { + .name = "radeonsi", + .driver_name = "radeon", + .create_screen = pipe_radeonsi_create_screen, + .configuration = configuration_query, + }, + { + .name = "vmwgfx", + .driver_name = "vmwgfx", + .create_screen = pipe_vmwgfx_create_screen, + .configuration = configuration_query, + }, + { + .name = "kgsl", + .driver_name = "freedreno", + .create_screen = pipe_freedreno_create_screen, + .configuration = configuration_query, + }, + { + .name = "msm", + .driver_name = "freedreno", + .create_screen = pipe_freedreno_create_screen, + .configuration = configuration_query, + }, + { + .name = "virtio_gpu", + .driver_name = "virtio-gpu", + .create_screen = pipe_virgl_create_screen, + .configuration = configuration_query, + }, + { + .name = "vc4", + .driver_name = "vc4", + .create_screen = pipe_vc4_create_screen, + .configuration = configuration_query, + }, +}; +#endif bool pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd) @@ -81,10 +192,36 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd) if (!ddev->base.driver_name) goto fail; +#ifdef GALLIUM_STATIC_TARGETS + for (int i = 0; i < ARRAY_SIZE(driver_descriptors); i++) { + if (strcmp(driver_descriptors[i].name, ddev->base.driver_name) == 0) { + ddev->dd = &driver_descriptors[i]; + break; + } + } + if (!ddev->dd) + goto fail; +#else + ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR); + if (!ddev->lib) + goto fail; + + ddev->dd = (const struct drm_driver_descriptor *) + util_dl_get_proc_address(ddev->lib, "driver_descriptor"); + + /* sanity check on the name */ + if (!ddev->dd || strcmp(ddev->dd->name, ddev->base.driver_name) != 0) + goto fail; +#endif + *dev = &ddev->base; return true; fail: +#ifndef GALLIUM_STATIC_TARGETS + if (ddev->lib) + util_dl_close(ddev->lib); +#endif FREE(ddev); return false; } @@ -105,8 +242,9 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev) for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0; i <= DRM_RENDER_NODE_MAX_MINOR; i++) { - fd = open_drm_render_node_minor(i); struct pipe_loader_device *dev; + + fd = open_drm_render_node_minor(i); if (fd < 0) continue; @@ -132,8 +270,10 @@ pipe_loader_drm_release(struct pipe_loader_device **dev) { struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev); +#ifndef GALLIUM_STATIC_TARGETS if (ddev->lib) util_dl_close(ddev->lib); +#endif close(ddev->fd); FREE(ddev->base.driver_name); @@ -146,47 +286,22 @@ pipe_loader_drm_configuration(struct pipe_loader_device *dev, enum drm_conf conf) { struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev); - const struct drm_driver_descriptor *dd; - - if (!ddev->lib) - return NULL; - - dd = (const struct drm_driver_descriptor *) - util_dl_get_proc_address(ddev->lib, "driver_descriptor"); - /* sanity check on the name */ - if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0) + if (!ddev->dd->configuration) return NULL; - if (!dd->configuration) - return NULL; - - return dd->configuration(conf); + return ddev->dd->configuration(conf); } static struct pipe_screen * -pipe_loader_drm_create_screen(struct pipe_loader_device *dev, - const char *library_paths) +pipe_loader_drm_create_screen(struct pipe_loader_device *dev) { struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev); - const struct drm_driver_descriptor *dd; - - if (!ddev->lib) - ddev->lib = pipe_loader_find_module(dev, library_paths); - if (!ddev->lib) - return NULL; - - dd = (const struct drm_driver_descriptor *) - util_dl_get_proc_address(ddev->lib, "driver_descriptor"); - - /* sanity check on the name */ - if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0) - return NULL; - return dd->create_screen(ddev->fd); + return ddev->dd->create_screen(ddev->fd); } -static struct pipe_loader_ops pipe_loader_drm_ops = { +static const struct pipe_loader_ops pipe_loader_drm_ops = { .create_screen = pipe_loader_drm_create_screen, .configuration = pipe_loader_drm_configuration, .release = pipe_loader_drm_release diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h index d3b025221c5..da2ca8c6e1f 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h @@ -31,8 +31,7 @@ #include "pipe_loader.h" struct pipe_loader_ops { - struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev, - const char *library_paths); + struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev); const struct drm_conf_ret *(*configuration)(struct pipe_loader_device *dev, enum drm_conf conf); diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c index 6794930193d..5539a730b4c 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -30,45 +30,160 @@ #include "util/u_memory.h" #include "util/u_dl.h" #include "sw/dri/dri_sw_winsys.h" +#include "sw/kms-dri/kms_dri_sw_winsys.h" #include "sw/null/null_sw_winsys.h" #include "sw/wrapper/wrapper_sw_winsys.h" #include "target-helpers/inline_sw_helper.h" #include "state_tracker/drisw_api.h" +#include "state_tracker/sw_driver.h" struct pipe_loader_sw_device { struct pipe_loader_device base; + const struct sw_driver_descriptor *dd; +#ifndef GALLIUM_STATIC_TARGETS struct util_dl_library *lib; +#endif struct sw_winsys *ws; }; #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev) -static struct pipe_loader_ops pipe_loader_sw_ops; +static const struct pipe_loader_ops pipe_loader_sw_ops; -static struct sw_winsys *(*backends[])() = { - null_sw_create +#ifdef GALLIUM_STATIC_TARGETS +static const struct sw_driver_descriptor driver_descriptors = { + .create_screen = sw_screen_create, + .winsys = { +#ifdef HAVE_PIPE_LOADER_DRI + { + .name = "dri", + .create_winsys = dri_create_sw_winsys, + }, +#endif +#ifdef HAVE_PIPE_LOADER_KMS + { + .name = "kms_dri", + .create_winsys = kms_dri_create_winsys, + }, +#endif +/** + * XXX: Do not include these two for non autotools builds. + * They don't have neither opencl nor nine, where these are used. + */ +#ifndef DROP_PIPE_LOADER_MISC + { + .name = "null", + .create_winsys = null_sw_create, + }, + { + .name = "wrapped", + .create_winsys = wrapper_sw_winsys_wrap_pipe_screen, + }, +#endif + { 0 }, + } }; +#endif + +static bool +pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev) +{ + sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; + sdev->base.driver_name = "swrast"; + sdev->base.ops = &pipe_loader_sw_ops; + +#ifdef GALLIUM_STATIC_TARGETS + sdev->dd = &driver_descriptors; + if (!sdev->dd) + return false; +#else + sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR); + if (!sdev->lib) + return false; + + sdev->dd = (const struct sw_driver_descriptor *) + util_dl_get_proc_address(sdev->lib, "swrast_driver_descriptor"); + + if (!sdev->dd){ + util_dl_close(sdev->lib); + sdev->lib = NULL; + return false; + } +#endif + + return true; +} + +static void +pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev) +{ +#ifndef GALLIUM_STATIC_TARGETS + if (sdev->lib) + util_dl_close(sdev->lib); +#endif +} #ifdef HAVE_PIPE_LOADER_DRI bool pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; if (!sdev) return false; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = dri_create_sw_winsys(drisw_lf); - if (!sdev->ws) { - FREE(sdev); - return false; + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; + + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf); + break; + } } + if (!sdev->ws) + goto fail; + *devs = &sdev->base; + return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; +} +#endif + +#ifdef HAVE_PIPE_LOADER_KMS +bool +pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd) +{ + struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; + if (!sdev) + return false; + + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; + + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(fd); + break; + } + } + if (!sdev->ws) + goto fail; + + *devs = &sdev->base; return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; } #endif @@ -76,38 +191,40 @@ bool pipe_loader_sw_probe_null(struct pipe_loader_device **devs) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; if (!sdev) return false; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = null_sw_create(); - if (!sdev->ws) { - FREE(sdev); - return false; + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; + + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "null") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(); + break; + } } - *devs = &sdev->base; + if (!sdev->ws) + goto fail; + *devs = &sdev->base; return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; } int pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev) { - int i; - - for (i = 0; i < Elements(backends); i++) { - if (i < ndev) { - struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); - /* TODO: handle CALLOC_STRUCT failure */ + int i = 1; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = backends[i](); - devs[i] = &sdev->base; + if (i < ndev) { + if (!pipe_loader_sw_probe_null(devs)) { + i--; } } @@ -119,21 +236,30 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev, struct pipe_screen *screen) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; if (!sdev) return false; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = wrapper_sw_winsys_wrap_pipe_screen(screen); + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; - if (!sdev->ws) { - FREE(sdev); - return false; + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(screen); + break; + } } + if (!sdev->ws) + goto fail; + *dev = &sdev->base; return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; } static void @@ -141,8 +267,10 @@ pipe_loader_sw_release(struct pipe_loader_device **dev) { struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev); +#ifndef GALLIUM_STATIC_TARGETS if (sdev->lib) util_dl_close(sdev->lib); +#endif FREE(sdev); *dev = NULL; @@ -156,28 +284,19 @@ pipe_loader_sw_configuration(struct pipe_loader_device *dev, } static struct pipe_screen * -pipe_loader_sw_create_screen(struct pipe_loader_device *dev, - const char *library_paths) +pipe_loader_sw_create_screen(struct pipe_loader_device *dev) { struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev); - struct pipe_screen *(*init)(struct sw_winsys *); + struct pipe_screen *screen; - if (!sdev->lib) - sdev->lib = pipe_loader_find_module(dev, library_paths); - if (!sdev->lib) - return NULL; - - init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen"); - if (!init){ - util_dl_close(sdev->lib); - sdev->lib = NULL; - return NULL; - } + screen = sdev->dd->create_screen(sdev->ws); + if (!screen) + sdev->ws->destroy(sdev->ws); - return init(sdev->ws); + return screen; } -static struct pipe_loader_ops pipe_loader_sw_ops = { +static const struct pipe_loader_ops pipe_loader_sw_ops = { .create_screen = pipe_loader_sw_create_screen, .configuration = pipe_loader_sw_configuration, .release = pipe_loader_sw_release diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h new file mode 100644 index 00000000000..332b1cba984 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -0,0 +1,275 @@ +#ifndef DRM_HELPER_H +#define DRM_HELPER_H + +#include <stdio.h> +#include "target-helpers/inline_debug_helper.h" +#include "target-helpers/drm_helper_public.h" + +#ifdef GALLIUM_I915 +#include "i915/drm/i915_drm_public.h" +#include "i915/i915_public.h" + +struct pipe_screen * +pipe_i915_create_screen(int fd) +{ + struct i915_winsys *iws; + struct pipe_screen *screen; + + iws = i915_drm_winsys_create(fd); + if (!iws) + return NULL; + + screen = i915_screen_create(iws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_i915_create_screen(int fd) +{ + fprintf(stderr, "i915g: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_ILO +#include "intel/drm/intel_drm_public.h" +#include "ilo/ilo_public.h" + +struct pipe_screen * +pipe_ilo_create_screen(int fd) +{ + struct intel_winsys *iws; + struct pipe_screen *screen; + + iws = intel_winsys_create_for_fd(fd); + if (!iws) + return NULL; + + screen = ilo_screen_create(iws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_ilo_create_screen(int fd) +{ + fprintf(stderr, "ilo: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_NOUVEAU +#include "nouveau/drm/nouveau_drm_public.h" + +struct pipe_screen * +pipe_nouveau_create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = nouveau_drm_screen_create(fd); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_nouveau_create_screen(int fd) +{ + fprintf(stderr, "nouveau: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_R300 +#include "radeon/radeon_winsys.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r300/r300_public.h" + +struct pipe_screen * +pipe_r300_create_screen(int fd) +{ + struct radeon_winsys *rw; + + rw = radeon_drm_winsys_create(fd, r300_screen_create); + return rw ? debug_screen_wrap(rw->screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_r300_create_screen(int fd) +{ + fprintf(stderr, "r300: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_R600 +#include "radeon/radeon_winsys.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r600/r600_public.h" + +struct pipe_screen * +pipe_r600_create_screen(int fd) +{ + struct radeon_winsys *rw; + + rw = radeon_drm_winsys_create(fd, r600_screen_create); + return rw ? debug_screen_wrap(rw->screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_r600_create_screen(int fd) +{ + fprintf(stderr, "r600: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_RADEONSI +#include "radeon/radeon_winsys.h" +#include "radeon/drm/radeon_drm_public.h" +#include "amdgpu/drm/amdgpu_public.h" +#include "radeonsi/si_public.h" + +struct pipe_screen * +pipe_radeonsi_create_screen(int fd) +{ + struct radeon_winsys *rw; + + /* First, try amdgpu. */ + rw = amdgpu_winsys_create(fd, radeonsi_screen_create); + + if (!rw) + rw = radeon_drm_winsys_create(fd, radeonsi_screen_create); + + return rw ? debug_screen_wrap(rw->screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_radeonsi_create_screen(int fd) +{ + fprintf(stderr, "radeonsi: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_VMWGFX +#include "svga/drm/svga_drm_public.h" +#include "svga/svga_public.h" + +struct pipe_screen * +pipe_vmwgfx_create_screen(int fd) +{ + struct svga_winsys_screen *sws; + struct pipe_screen *screen; + + sws = svga_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = svga_screen_create(sws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_vmwgfx_create_screen(int fd) +{ + fprintf(stderr, "svga: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_FREEDRENO +#include "freedreno/drm/freedreno_drm_public.h" + +struct pipe_screen * +pipe_freedreno_create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = fd_drm_screen_create(fd); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_freedreno_create_screen(int fd) +{ + fprintf(stderr, "freedreno: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_VIRGL +#include "virgl/drm/virgl_drm_public.h" +#include "virgl/virgl_public.h" + +struct pipe_screen * +pipe_virgl_create_screen(int fd) +{ + struct virgl_winsys *vws; + struct pipe_screen *screen; + + vws = virgl_drm_winsys_create(fd); + if (!vws) + return NULL; + + screen = virgl_create_screen(vws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_virgl_create_screen(int fd) +{ + fprintf(stderr, "virgl: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_VC4 +#include "vc4/drm/vc4_drm_public.h" + +struct pipe_screen * +pipe_vc4_create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = vc4_drm_screen_create(fd); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_vc4_create_screen(int fd) +{ + fprintf(stderr, "vc4: driver missing\n"); + return NULL; +} + +#endif + + +#endif /* DRM_HELPER_H */ diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h new file mode 100644 index 00000000000..d1f9382a6f9 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h @@ -0,0 +1,37 @@ +#ifndef _DRM_HELPER_PUBLIC_H +#define _DRM_HELPER_PUBLIC_H + + +struct pipe_screen; + +struct pipe_screen * +pipe_i915_create_screen(int fd); + +struct pipe_screen * +pipe_ilo_create_screen(int fd); + +struct pipe_screen * +pipe_nouveau_create_screen(int fd); + +struct pipe_screen * +pipe_r300_create_screen(int fd); + +struct pipe_screen * +pipe_r600_create_screen(int fd); + +struct pipe_screen * +pipe_radeonsi_create_screen(int fd); + +struct pipe_screen * +pipe_vmwgfx_create_screen(int fd); + +struct pipe_screen * +pipe_freedreno_create_screen(int fd); + +struct pipe_screen * +pipe_virgl_create_screen(int fd); + +struct pipe_screen * +pipe_vc4_create_screen(int fd); + +#endif /* _DRM_HELPER_PUBLIC_H */ diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h deleted file mode 100644 index 6ca4dc8136c..00000000000 --- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h +++ /dev/null @@ -1,531 +0,0 @@ -#ifndef INLINE_DRM_HELPER_H -#define INLINE_DRM_HELPER_H - -#include "state_tracker/drm_driver.h" -#include "target-helpers/inline_debug_helper.h" -#include "loader.h" -#if defined(DRI_TARGET) -#include "dri_screen.h" -#endif - -#if GALLIUM_SOFTPIPE -#include "target-helpers/inline_sw_helper.h" -#include "sw/kms-dri/kms_dri_sw_winsys.h" -#endif - -#if GALLIUM_I915 -#include "i915/drm/i915_drm_public.h" -#include "i915/i915_public.h" -#endif - -#if GALLIUM_ILO -#include "intel/drm/intel_drm_public.h" -#include "ilo/ilo_public.h" -#endif - -#if GALLIUM_NOUVEAU -#include "nouveau/drm/nouveau_drm_public.h" -#endif - -#if GALLIUM_R300 -#include "radeon/radeon_winsys.h" -#include "radeon/drm/radeon_drm_public.h" -#include "r300/r300_public.h" -#endif - -#if GALLIUM_R600 -#include "radeon/radeon_winsys.h" -#include "radeon/drm/radeon_drm_public.h" -#include "r600/r600_public.h" -#endif - -#if GALLIUM_RADEONSI -#include "radeon/radeon_winsys.h" -#include "radeon/drm/radeon_drm_public.h" -#include "amdgpu/drm/amdgpu_public.h" -#include "radeonsi/si_public.h" -#endif - -#if GALLIUM_VMWGFX -#include "svga/drm/svga_drm_public.h" -#include "svga/svga_public.h" -#endif - -#if GALLIUM_FREEDRENO -#include "freedreno/drm/freedreno_drm_public.h" -#endif - -#if GALLIUM_VC4 -#include "vc4/drm/vc4_drm_public.h" -#endif - -#if GALLIUM_VIRGL -#include "virgl/drm/virgl_drm_public.h" -#include "virgl/virgl_public.h" -#endif - -static char* driver_name = NULL; - -/* XXX: We need to teardown the winsys if *screen_create() fails. */ - -#if defined(GALLIUM_SOFTPIPE) -#if defined(DRI_TARGET) -#if defined(HAVE_LIBDRM) - -const __DRIextension **__driDriverGetExtensions_kms_swrast(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void) -{ - globalDriverAPI = &dri_kms_driver_api; - return galliumdrm_driver_extensions; -} - -struct pipe_screen * -kms_swrast_create_screen(int fd) -{ - struct sw_winsys *sws; - struct pipe_screen *screen; - - sws = kms_dri_create_winsys(fd); - if (!sws) - return NULL; - - screen = sw_screen_create(sws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif -#endif -#endif - -#if defined(GALLIUM_I915) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_i915(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_i915_create_screen(int fd) -{ - struct i915_winsys *iws; - struct pipe_screen *screen; - - iws = i915_drm_winsys_create(fd); - if (!iws) - return NULL; - - screen = i915_screen_create(iws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_ILO) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_i965(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_ilo_create_screen(int fd) -{ - struct intel_winsys *iws; - struct pipe_screen *screen; - - iws = intel_winsys_create_for_fd(fd); - if (!iws) - return NULL; - - screen = ilo_screen_create(iws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_NOUVEAU) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_nouveau(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_nouveau_create_screen(int fd) -{ - struct pipe_screen *screen; - - screen = nouveau_drm_screen_create(fd); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_R300) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_r300(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_r300_create_screen(int fd) -{ - struct radeon_winsys *rw; - - rw = radeon_drm_winsys_create(fd, r300_screen_create); - return rw ? debug_screen_wrap(rw->screen) : NULL; -} -#endif - -#if defined(GALLIUM_R600) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_r600(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_r600_create_screen(int fd) -{ - struct radeon_winsys *rw; - - rw = radeon_drm_winsys_create(fd, r600_screen_create); - return rw ? debug_screen_wrap(rw->screen) : NULL; -} -#endif - -#if defined(GALLIUM_RADEONSI) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_radeonsi(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_radeonsi_create_screen(int fd) -{ - struct radeon_winsys *rw; - - /* First, try amdgpu. */ - rw = amdgpu_winsys_create(fd, radeonsi_screen_create); - - if (!rw) - rw = radeon_drm_winsys_create(fd, radeonsi_screen_create); - - return rw ? debug_screen_wrap(rw->screen) : NULL; -} -#endif - -#if defined(GALLIUM_VMWGFX) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_vmwgfx(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_vmwgfx_create_screen(int fd) -{ - struct svga_winsys_screen *sws; - struct pipe_screen *screen; - - sws = svga_drm_winsys_screen_create(fd); - if (!sws) - return NULL; - - screen = svga_screen_create(sws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_FREEDRENO) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_msm(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} - -const __DRIextension **__driDriverGetExtensions_kgsl(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_freedreno_create_screen(int fd) -{ - struct pipe_screen *screen; - - screen = fd_drm_screen_create(fd); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_VIRGL) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_virtio_gpu(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_virgl_create_screen(int fd) -{ - struct virgl_winsys *vws; - struct pipe_screen *screen; - - vws = virgl_drm_winsys_create(fd); - if (!vws) - return NULL; - - screen = virgl_create_screen(vws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_VC4) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_vc4(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} - -#if defined(USE_VC4_SIMULATOR) -const __DRIextension **__driDriverGetExtensions_i965(void); - -/** - * When building using the simulator (on x86), we advertise ourselves as the - * i965 driver so that you can just make a directory with a link from - * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that - * on your i965-using host to run the driver under simulation. - * - * This is, of course, incompatible with building with the ilo driver, but you - * shouldn't be building that anyway. - */ -PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -#endif - -static struct pipe_screen * -pipe_vc4_create_screen(int fd) -{ - struct pipe_screen *screen; - - screen = vc4_drm_screen_create(fd); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -inline struct pipe_screen * -dd_create_screen(int fd) -{ - driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM); - if (!driver_name) - return NULL; - -#if defined(GALLIUM_I915) - if (strcmp(driver_name, "i915") == 0) - return pipe_i915_create_screen(fd); - else -#endif -#if defined(GALLIUM_ILO) - if (strcmp(driver_name, "i965") == 0) - return pipe_ilo_create_screen(fd); - else -#endif -#if defined(GALLIUM_NOUVEAU) - if (strcmp(driver_name, "nouveau") == 0) - return pipe_nouveau_create_screen(fd); - else -#endif -#if defined(GALLIUM_R300) - if (strcmp(driver_name, "r300") == 0) - return pipe_r300_create_screen(fd); - else -#endif -#if defined(GALLIUM_R600) - if (strcmp(driver_name, "r600") == 0) - return pipe_r600_create_screen(fd); - else -#endif -#if defined(GALLIUM_RADEONSI) - if (strcmp(driver_name, "radeonsi") == 0) - return pipe_radeonsi_create_screen(fd); - else -#endif -#if defined(GALLIUM_VMWGFX) - if (strcmp(driver_name, "vmwgfx") == 0) - return pipe_vmwgfx_create_screen(fd); - else -#endif -#if defined(GALLIUM_FREEDRENO) - if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0)) - return pipe_freedreno_create_screen(fd); - else -#endif -#if defined(GALLIUM_VIRGL) - if ((strcmp(driver_name, "virtio_gpu") == 0)) - return pipe_virgl_create_screen(fd); - else -#endif -#if defined(GALLIUM_VC4) - if (strcmp(driver_name, "vc4") == 0) - return pipe_vc4_create_screen(fd); - else -#if defined(USE_VC4_SIMULATOR) - if (strcmp(driver_name, "i965") == 0) - return pipe_vc4_create_screen(fd); - else -#endif -#endif - return NULL; -} - -inline const char * -dd_driver_name(void) -{ - return driver_name; -} - -static const struct drm_conf_ret throttle_ret = { - DRM_CONF_INT, - {2}, -}; - -static const struct drm_conf_ret share_fd_ret = { - DRM_CONF_BOOL, - {true}, -}; - -static inline const struct drm_conf_ret * -configuration_query(enum drm_conf conf) -{ - switch (conf) { - case DRM_CONF_THROTTLE: - return &throttle_ret; - case DRM_CONF_SHARE_FD: - return &share_fd_ret; - default: - break; - } - return NULL; -} - -inline const struct drm_conf_ret * -dd_configuration(enum drm_conf conf) -{ - if (!driver_name) - return NULL; - -#if defined(GALLIUM_I915) - if (strcmp(driver_name, "i915") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_ILO) - if (strcmp(driver_name, "i965") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_NOUVEAU) - if (strcmp(driver_name, "nouveau") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_R300) - if (strcmp(driver_name, "r300") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_R600) - if (strcmp(driver_name, "r600") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_RADEONSI) - if (strcmp(driver_name, "radeonsi") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_VMWGFX) - if (strcmp(driver_name, "vmwgfx") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_FREEDRENO) - if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0)) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_VIRGL) - if ((strcmp(driver_name, "virtio_gpu") == 0)) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_VC4) - if (strcmp(driver_name, "vc4") == 0) - return configuration_query(conf); - else -#if defined(USE_VC4_SIMULATOR) - if (strcmp(driver_name, "i965") == 0) - return configuration_query(conf); - else -#endif -#endif - return NULL; -} -#endif /* INLINE_DRM_HELPER_H */ diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index f3693fb1f39..a9ab16f2b54 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -69,69 +69,4 @@ sw_screen_create(struct sw_winsys *winsys) return sw_screen_create_named(winsys, driver); } -#if defined(GALLIUM_SOFTPIPE) -#if defined(DRI_TARGET) -#include "target-helpers/inline_debug_helper.h" -#include "sw/dri/dri_sw_winsys.h" -#include "dri_screen.h" - -const __DRIextension **__driDriverGetExtensions_swrast(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void) -{ - globalDriverAPI = &galliumsw_driver_api; - return galliumsw_driver_extensions; -} - -inline struct pipe_screen * -drisw_create_screen(struct drisw_loader_funcs *lf) -{ - struct sw_winsys *winsys = NULL; - struct pipe_screen *screen = NULL; - - winsys = dri_create_sw_winsys(lf); - if (winsys == NULL) - return NULL; - - screen = sw_screen_create(winsys); - if (screen == NULL) { - winsys->destroy(winsys); - return NULL; - } - - screen = debug_screen_wrap(screen); - return screen; -} -#endif // DRI_TARGET - -#if defined(NINE_TARGET) -#include "sw/wrapper/wrapper_sw_winsys.h" -#include "target-helpers/inline_debug_helper.h" - -extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen); - -inline struct pipe_screen * -ninesw_create_screen(struct pipe_screen *pscreen) -{ - struct sw_winsys *winsys = NULL; - struct pipe_screen *screen = NULL; - - winsys = wrapper_sw_winsys_wrap_pipe_screen(pscreen); - if (winsys == NULL) - return NULL; - - screen = sw_screen_create(winsys); - if (screen == NULL) { - winsys->destroy(winsys); - return NULL; - } - - screen = debug_screen_wrap(screen); - return screen; -} -#endif // NINE_TARGET - -#endif // GALLIUM_SOFTPIPE - - #endif diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index aca435d6cad..9b97d8dc4b9 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -45,7 +45,7 @@ struct util_dl_library * util_dl_open(const char *filename) { #if defined(PIPE_OS_UNIX) - return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL); + return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_LOCAL); #elif defined(PIPE_OS_WINDOWS) return (struct util_dl_library *)LoadLibraryA(filename); #else diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index d3b77e6b99b..c26d7331d4c 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -202,6 +202,36 @@ PIPE_FORMAT_BPTC_SRGBA , bptc, 4, 4, x128, , , , xyzw, sr PIPE_FORMAT_BPTC_RGB_FLOAT , bptc, 4, 4, x128, , , , xyz1, rgb PIPE_FORMAT_BPTC_RGB_UFLOAT , bptc, 4, 4, x128, , , , xyz1, rgb +PIPE_FORMAT_ASTC_4x4 , astc, 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_5x4 , astc, 5, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_5x5 , astc, 5, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_6x5 , astc, 6, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_6x6 , astc, 6, 6, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_8x5 , astc, 8, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_8x6 , astc, 8, 6, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_8x8 , astc, 8, 8, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x5 , astc,10, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x6 , astc,10, 6, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x8 , astc,10, 8, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x10 , astc,10,10, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_12x10 , astc,12,10, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_12x12 , astc,12,12, x128, , , , xyzw, rgb + +PIPE_FORMAT_ASTC_4x4_SRGB , astc, 4, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_5x4_SRGB , astc, 5, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_5x5_SRGB , astc, 5, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_6x5_SRGB , astc, 6, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_6x6_SRGB , astc, 6, 6, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_8x5_SRGB , astc, 8, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_8x6_SRGB , astc, 8, 6, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_8x8_SRGB , astc, 8, 8, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x5_SRGB , astc,10, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x6_SRGB , astc,10, 6, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x8_SRGB , astc,10, 8, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x10_SRGB , astc,10,10, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_12x10_SRGB , astc,12,10, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_12x12_SRGB , astc,12,12, x128, , , , xyzw, srgb + # Straightforward D3D10-like formats (also used for # vertex buffer element description) # diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index a1b1b28fa41..ffdb864fa83 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -84,9 +84,14 @@ enum util_format_layout { UTIL_FORMAT_LAYOUT_BPTC = 7, /** + * ASTC + */ + UTIL_FORMAT_LAYOUT_ASTC = 8, + + /** * Everything else that doesn't fit in any of the above layouts. */ - UTIL_FORMAT_LAYOUT_OTHER = 8 + UTIL_FORMAT_LAYOUT_OTHER = 9 }; @@ -481,6 +486,7 @@ util_format_is_compressed(enum pipe_format format) case UTIL_FORMAT_LAYOUT_RGTC: case UTIL_FORMAT_LAYOUT_ETC: case UTIL_FORMAT_LAYOUT_BPTC: + case UTIL_FORMAT_LAYOUT_ASTC: /* XXX add other formats in the future */ return TRUE; default: @@ -924,6 +930,35 @@ util_format_srgb(enum pipe_format format) return PIPE_FORMAT_B5G6R5_SRGB; case PIPE_FORMAT_BPTC_RGBA_UNORM: return PIPE_FORMAT_BPTC_SRGBA; + case PIPE_FORMAT_ASTC_4x4: + return PIPE_FORMAT_ASTC_4x4_SRGB; + case PIPE_FORMAT_ASTC_5x4: + return PIPE_FORMAT_ASTC_5x4_SRGB; + case PIPE_FORMAT_ASTC_5x5: + return PIPE_FORMAT_ASTC_5x5_SRGB; + case PIPE_FORMAT_ASTC_6x5: + return PIPE_FORMAT_ASTC_6x5_SRGB; + case PIPE_FORMAT_ASTC_6x6: + return PIPE_FORMAT_ASTC_6x6_SRGB; + case PIPE_FORMAT_ASTC_8x5: + return PIPE_FORMAT_ASTC_8x5_SRGB; + case PIPE_FORMAT_ASTC_8x6: + return PIPE_FORMAT_ASTC_8x6_SRGB; + case PIPE_FORMAT_ASTC_8x8: + return PIPE_FORMAT_ASTC_8x8_SRGB; + case PIPE_FORMAT_ASTC_10x5: + return PIPE_FORMAT_ASTC_10x5_SRGB; + case PIPE_FORMAT_ASTC_10x6: + return PIPE_FORMAT_ASTC_10x6_SRGB; + case PIPE_FORMAT_ASTC_10x8: + return PIPE_FORMAT_ASTC_10x8_SRGB; + case PIPE_FORMAT_ASTC_10x10: + return PIPE_FORMAT_ASTC_10x10_SRGB; + case PIPE_FORMAT_ASTC_12x10: + return PIPE_FORMAT_ASTC_12x10_SRGB; + case PIPE_FORMAT_ASTC_12x12: + return PIPE_FORMAT_ASTC_12x12_SRGB; + default: return PIPE_FORMAT_NONE; } @@ -971,6 +1006,34 @@ util_format_linear(enum pipe_format format) return PIPE_FORMAT_B5G6R5_UNORM; case PIPE_FORMAT_BPTC_SRGBA: return PIPE_FORMAT_BPTC_RGBA_UNORM; + case PIPE_FORMAT_ASTC_4x4_SRGB: + return PIPE_FORMAT_ASTC_4x4; + case PIPE_FORMAT_ASTC_5x4_SRGB: + return PIPE_FORMAT_ASTC_5x4; + case PIPE_FORMAT_ASTC_5x5_SRGB: + return PIPE_FORMAT_ASTC_5x5; + case PIPE_FORMAT_ASTC_6x5_SRGB: + return PIPE_FORMAT_ASTC_6x5; + case PIPE_FORMAT_ASTC_6x6_SRGB: + return PIPE_FORMAT_ASTC_6x6; + case PIPE_FORMAT_ASTC_8x5_SRGB: + return PIPE_FORMAT_ASTC_8x5; + case PIPE_FORMAT_ASTC_8x6_SRGB: + return PIPE_FORMAT_ASTC_8x6; + case PIPE_FORMAT_ASTC_8x8_SRGB: + return PIPE_FORMAT_ASTC_8x8; + case PIPE_FORMAT_ASTC_10x5_SRGB: + return PIPE_FORMAT_ASTC_10x5; + case PIPE_FORMAT_ASTC_10x6_SRGB: + return PIPE_FORMAT_ASTC_10x6; + case PIPE_FORMAT_ASTC_10x8_SRGB: + return PIPE_FORMAT_ASTC_10x8; + case PIPE_FORMAT_ASTC_10x10_SRGB: + return PIPE_FORMAT_ASTC_10x10; + case PIPE_FORMAT_ASTC_12x10_SRGB: + return PIPE_FORMAT_ASTC_12x10; + case PIPE_FORMAT_ASTC_12x12_SRGB: + return PIPE_FORMAT_ASTC_12x12; default: return format; } diff --git a/src/gallium/auxiliary/util/u_format_fake.c b/src/gallium/auxiliary/util/u_format_fake.c deleted file mode 100644 index 77e896d27bd..00000000000 --- a/src/gallium/auxiliary/util/u_format_fake.c +++ /dev/null @@ -1,37 +0,0 @@ -#include "u_format.h" -#include "u_format_fake.h" - -#define fake(format) \ -void \ -util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);} \ -\ -void \ -util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);} - -fake(bptc_rgba_unorm) -fake(bptc_srgba) -fake(bptc_rgb_float) -fake(bptc_rgb_ufloat) - -fake(etc2_rgb8) -fake(etc2_srgb8) -fake(etc2_rgb8a1) -fake(etc2_srgb8a1) -fake(etc2_rgba8) -fake(etc2_srgba8) -fake(etc2_r11_unorm) -fake(etc2_r11_snorm) -fake(etc2_rg11_unorm) -fake(etc2_rg11_snorm) diff --git a/src/gallium/auxiliary/util/u_format_fake.h b/src/gallium/auxiliary/util/u_format_fake.h deleted file mode 100644 index e6bfd4e1594..00000000000 --- a/src/gallium/auxiliary/util/u_format_fake.h +++ /dev/null @@ -1,66 +0,0 @@ -/************************************************************************** - * - * Copyright 2011 Red Hat Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - **************************************************************************/ - -#ifndef U_FORMAT_FAKE_H_ -#define U_FORMAT_FAKE_H_ - -#define __format_fake(format) \ -void \ -util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); \ -\ -void \ -util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); - -__format_fake(bptc_rgba_unorm) -__format_fake(bptc_srgba) -__format_fake(bptc_rgb_float) -__format_fake(bptc_rgb_ufloat) - -__format_fake(etc2_rgb8) -__format_fake(etc2_srgb8) -__format_fake(etc2_rgb8a1) -__format_fake(etc2_srgb8a1) -__format_fake(etc2_rgba8) -__format_fake(etc2_srgba8) -__format_fake(etc2_r11_unorm) -__format_fake(etc2_r11_snorm) -__format_fake(etc2_rg11_unorm) -__format_fake(etc2_rg11_snorm) - -#endif diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py index fb42de723c4..d4bb1de4cb5 100644 --- a/src/gallium/auxiliary/util/u_format_pack.py +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -686,7 +686,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix): def is_format_hand_written(format): - return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'subsampled', 'other') or format.colorspace == ZS + return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'astc', 'subsampled', 'other') or format.colorspace == ZS def generate(formats): diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index aceb0caf7e1..879d10ff01d 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -90,7 +90,6 @@ def write_format_table(formats): print '#include "u_format_rgtc.h"' print '#include "u_format_latc.h"' print '#include "u_format_etc.h"' - print '#include "u_format_fake.h"' print u_format_pack.generate(formats) @@ -139,10 +138,15 @@ def write_format_table(formats): u_format_pack.print_channels(format, do_channel_array) u_format_pack.print_channels(format, do_swizzle_array) print " %s," % (colorspace_map(format.colorspace),) - if format.colorspace != ZS and not format.is_pure_color(): + access = True + if format.layout in ('bptc', 'astc'): + access = False + if format.layout == 'etc' and format.short_name() != 'etc1_rgb8': + access = False + if format.colorspace != ZS and not format.is_pure_color() and access: print " &util_format_%s_unpack_rgba_8unorm," % format.short_name() print " &util_format_%s_pack_rgba_8unorm," % format.short_name() - if format.layout == 's3tc' or format.layout == 'rgtc' or format.layout == 'bptc': + if format.layout == 's3tc' or format.layout == 'rgtc': print " &util_format_%s_fetch_rgba_8unorm," % format.short_name() else: print " NULL, /* fetch_rgba_8unorm */" diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h index df01917466f..1af7653d650 100644 --- a/src/gallium/auxiliary/vl/vl_winsys.h +++ b/src/gallium/auxiliary/vl/vl_winsys.h @@ -42,34 +42,31 @@ struct pipe_loader_device; struct vl_screen { - struct pipe_screen *pscreen; - struct pipe_loader_device *dev; -}; + void (*destroy)(struct vl_screen *vscreen); -struct vl_screen* -vl_screen_create(Display *display, int screen); + struct pipe_resource * + (*texture_from_drawable)(struct vl_screen *vscreen, void *drawable); -void vl_screen_destroy(struct vl_screen *vscreen); + struct u_rect * + (*get_dirty_area)(struct vl_screen *vscreen); -struct pipe_resource* -vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable); + uint64_t + (*get_timestamp)(struct vl_screen *vscreen, void *drawable); -struct u_rect * -vl_screen_get_dirty_area(struct vl_screen *vscreen); + void + (*set_next_timestamp)(struct vl_screen *vscreen, uint64_t stamp); -uint64_t -vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable); + void * + (*get_private)(struct vl_screen *vscreen); -void -vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp); + struct pipe_screen *pscreen; + struct pipe_loader_device *dev; +}; -void* -vl_screen_get_private(struct vl_screen *vscreen); +struct vl_screen * +vl_dri2_screen_create(Display *display, int screen); -struct vl_screen* +struct vl_screen * vl_drm_screen_create(int fd); -void -vl_drm_screen_destroy(struct vl_screen *vscreen); - #endif diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c index 3b1b87f9523..ae0d4cdee1b 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_dri.c +++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c @@ -73,24 +73,27 @@ struct vl_dri_screen int64_t last_ust, ns_frame, last_msc, next_msc; }; -static const unsigned int attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT }; +static const unsigned attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT }; + +static void vl_dri2_screen_destroy(struct vl_screen *vscreen); static void -vl_dri2_handle_stamps(struct vl_dri_screen* scrn, +vl_dri2_handle_stamps(struct vl_dri_screen *scrn, uint32_t ust_hi, uint32_t ust_lo, uint32_t msc_hi, uint32_t msc_lo) { int64_t ust = ((((uint64_t)ust_hi) << 32) | ust_lo) * 1000; int64_t msc = (((uint64_t)msc_hi) << 32) | msc_lo; - if (scrn->last_ust && scrn->last_msc && (ust > scrn->last_ust) && (msc > scrn->last_msc)) + if (scrn->last_ust && (ust > scrn->last_ust) && + scrn->last_msc && (msc > scrn->last_msc)) scrn->ns_frame = (ust - scrn->last_ust) / (msc - scrn->last_msc); scrn->last_ust = ust; scrn->last_msc = msc; } -static xcb_dri2_get_buffers_reply_t* +static xcb_dri2_get_buffers_reply_t * vl_dri2_get_flush_reply(struct vl_dri_screen *scrn) { xcb_dri2_wait_sbc_reply_t *wait_sbc_reply; @@ -120,7 +123,7 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen, unsigned level, unsigned layer, void *context_private, struct pipe_box *sub_box) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)context_private; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)context_private; uint32_t msc_hi, msc_lo; assert(screen); @@ -132,9 +135,11 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen, msc_hi = scrn->next_msc >> 32; msc_lo = scrn->next_msc & 0xFFFFFFFF; - scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable, msc_hi, msc_lo, 0, 0, 0, 0); + scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable, + msc_hi, msc_lo, 0, 0, 0, 0); scrn->wait_cookie = xcb_dri2_wait_sbc_unchecked(scrn->conn, scrn->drawable, 0, 0); - scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable, 1, 1, attachments); + scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable, + 1, 1, attachments); scrn->flushed = true; scrn->current_buffer = !scrn->current_buffer; @@ -170,10 +175,10 @@ vl_dri2_set_drawable(struct vl_dri_screen *scrn, Drawable drawable) scrn->drawable = drawable; } -struct pipe_resource* -vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable) +static struct pipe_resource * +vl_dri2_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; struct winsys_handle dri2_handle; struct pipe_resource template, *tex; @@ -185,11 +190,12 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable) assert(scrn); - vl_dri2_set_drawable(scrn, drawable); + vl_dri2_set_drawable(scrn, (Drawable)drawable); reply = vl_dri2_get_flush_reply(scrn); if (!reply) { xcb_dri2_get_buffers_cookie_t cookie; - cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, drawable, 1, 1, attachments); + cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, (Drawable)drawable, + 1, 1, attachments); reply = xcb_dri2_get_buffers_reply(scrn->conn, cookie, NULL); } if (!reply) @@ -241,32 +247,33 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable) template.bind = PIPE_BIND_RENDER_TARGET; template.flags = 0; - tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template, &dri2_handle); + tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template, + &dri2_handle); free(reply); return tex; } -struct u_rect * -vl_screen_get_dirty_area(struct vl_screen *vscreen) +static struct u_rect * +vl_dri2_screen_get_dirty_area(struct vl_screen *vscreen) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; assert(scrn); return &scrn->dirty_areas[scrn->current_buffer]; } -uint64_t -vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable) +static uint64_t +vl_dri2_screen_get_timestamp(struct vl_screen *vscreen, void *drawable) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; xcb_dri2_get_msc_cookie_t cookie; xcb_dri2_get_msc_reply_t *reply; assert(scrn); - vl_dri2_set_drawable(scrn, drawable); + vl_dri2_set_drawable(scrn, (Drawable)drawable); if (!scrn->last_ust) { - cookie = xcb_dri2_get_msc_unchecked(scrn->conn, drawable); + cookie = xcb_dri2_get_msc_unchecked(scrn->conn, (Drawable)drawable); reply = xcb_dri2_get_msc_reply(scrn->conn, cookie, NULL); if (reply) { @@ -278,19 +285,20 @@ vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable) return scrn->last_ust; } -void -vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp) +static void +vl_dri2_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; assert(scrn); if (stamp && scrn->last_ust && scrn->ns_frame && scrn->last_msc) - scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) / scrn->ns_frame + scrn->last_msc; + scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) / + scrn->ns_frame + scrn->last_msc; else scrn->next_msc = 0; } -void* -vl_screen_get_private(struct vl_screen *vscreen) +static void * +vl_dri2_screen_get_private(struct vl_screen *vscreen) { return vscreen; } @@ -305,8 +313,8 @@ get_xcb_screen(xcb_screen_iterator_t iter, int screen) return NULL; } -struct vl_screen* -vl_screen_create(Display *display, int screen) +struct vl_screen * +vl_dri2_screen_create(Display *display, int screen) { struct vl_dri_screen *scrn; const xcb_query_extension_reply_t *extension; @@ -320,7 +328,7 @@ vl_screen_create(Display *display, int screen) xcb_generic_error_t *error = NULL; char *device_name; int fd, device_name_length; - unsigned int driverType; + unsigned driverType; drm_magic_t magic; @@ -340,7 +348,9 @@ vl_screen_create(Display *display, int screen) if (!(extension && extension->present)) goto free_screen; - dri2_query_cookie = xcb_dri2_query_version (scrn->conn, XCB_DRI2_MAJOR_VERSION, XCB_DRI2_MINOR_VERSION); + dri2_query_cookie = xcb_dri2_query_version (scrn->conn, + XCB_DRI2_MAJOR_VERSION, + XCB_DRI2_MINOR_VERSION); dri2_query = xcb_dri2_query_version_reply (scrn->conn, dri2_query_cookie, &error); if (dri2_query == NULL || error != NULL || dri2_query->minor_version < 2) goto free_query; @@ -352,7 +362,7 @@ vl_screen_create(Display *display, int screen) { char *prime = getenv("DRI_PRIME"); if (prime) { - unsigned int primeid; + unsigned primeid; errno = 0; primeid = strtoul(prime, NULL, 0); if (errno == 0) @@ -362,9 +372,12 @@ vl_screen_create(Display *display, int screen) } #endif - connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, driverType); + connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, + get_xcb_screen(s, screen)->root, + driverType); connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL); - if (connect == NULL || connect->driver_name_length + connect->device_name_length == 0) + if (connect == NULL || + connect->driver_name_length + connect->device_name_length == 0) goto free_connect; device_name_length = xcb_dri2_connect_device_name_length(connect); @@ -381,22 +394,26 @@ vl_screen_create(Display *display, int screen) if (drmGetMagic(fd, &magic)) goto free_connect; - authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, magic); + authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, + get_xcb_screen(s, screen)->root, + magic); authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, NULL); if (authenticate == NULL || !authenticate->authenticated) goto free_authenticate; -#if GALLIUM_STATIC_TARGETS - scrn->base.pscreen = dd_create_screen(fd); -#else - if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd)) - scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, PIPE_SEARCH_DIR); -#endif // GALLIUM_STATIC_TARGETS + if (pipe_loader_drm_probe_fd(&scrn->base.dev, dup(fd))) + scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev); if (!scrn->base.pscreen) goto release_pipe; + scrn->base.destroy = vl_dri2_screen_destroy; + scrn->base.texture_from_drawable = vl_dri2_screen_texture_from_drawable; + scrn->base.get_dirty_area = vl_dri2_screen_get_dirty_area; + scrn->base.get_timestamp = vl_dri2_screen_get_timestamp; + scrn->base.set_next_timestamp = vl_dri2_screen_set_next_timestamp; + scrn->base.get_private = vl_dri2_screen_get_private; scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer; vl_compositor_reset_dirty_area(&scrn->dirty_areas[0]); vl_compositor_reset_dirty_area(&scrn->dirty_areas[1]); @@ -409,10 +426,8 @@ vl_screen_create(Display *display, int screen) return &scrn->base; release_pipe: -#if !GALLIUM_STATIC_TARGETS if (scrn->base.dev) pipe_loader_release(&scrn->base.dev, 1); -#endif // !GALLIUM_STATIC_TARGETS free_authenticate: free(authenticate); free_connect: @@ -426,9 +441,10 @@ free_screen: return NULL; } -void vl_screen_destroy(struct vl_screen *vscreen) +static void +vl_dri2_screen_destroy(struct vl_screen *vscreen) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; assert(vscreen); @@ -440,8 +456,6 @@ void vl_screen_destroy(struct vl_screen *vscreen) vl_dri2_destroy_drawable(scrn); scrn->base.pscreen->destroy(scrn->base.pscreen); -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&scrn->base.dev, 1); -#endif // !GALLIUM_STATIC_TARGETS FREE(scrn); } diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c index 1167fcf6a90..f993e2c7727 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_drm.c +++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c @@ -34,7 +34,10 @@ #include "util/u_memory.h" #include "vl/vl_winsys.h" -struct vl_screen* +static void +vl_drm_screen_destroy(struct vl_screen *vscreen); + +struct vl_screen * vl_drm_screen_create(int fd) { struct vl_screen *vscreen; @@ -43,35 +46,34 @@ vl_drm_screen_create(int fd) if (!vscreen) return NULL; -#if GALLIUM_STATIC_TARGETS - vscreen->pscreen = dd_create_screen(fd); -#else - if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) { - vscreen->pscreen = - pipe_loader_create_screen(vscreen->dev, PIPE_SEARCH_DIR); - if (!vscreen->pscreen) - pipe_loader_release(&vscreen->dev, 1); - } -#endif + if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) + vscreen->pscreen = pipe_loader_create_screen(vscreen->dev); - if (!vscreen->pscreen) { - FREE(vscreen); - return NULL; - } + if (!vscreen->pscreen) + goto error; + vscreen->destroy = vl_drm_screen_destroy; + vscreen->texture_from_drawable = NULL; + vscreen->get_dirty_area = NULL; + vscreen->get_timestamp = NULL; + vscreen->set_next_timestamp = NULL; + vscreen->get_private = NULL; return vscreen; + +error: + if (vscreen->dev) + pipe_loader_release(&vscreen->dev, 1); + + FREE(vscreen); + return NULL; } -void +static void vl_drm_screen_destroy(struct vl_screen *vscreen) { assert(vscreen); vscreen->pscreen->destroy(vscreen->pscreen); - -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&vscreen->dev, 1); -#endif - FREE(vscreen); } diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index ef235734755..77f708f449c 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index b5e1ddadde0..a6940dfefea 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -111,10 +111,14 @@ enum a3xx_vtx_fmt { VFMT_8_8_SNORM = 53, VFMT_8_8_8_SNORM = 54, VFMT_8_8_8_8_SNORM = 55, - VFMT_10_10_10_2_UINT = 60, - VFMT_10_10_10_2_UNORM = 61, - VFMT_10_10_10_2_SINT = 62, - VFMT_10_10_10_2_SNORM = 63, + VFMT_10_10_10_2_UINT = 56, + VFMT_10_10_10_2_UNORM = 57, + VFMT_10_10_10_2_SINT = 58, + VFMT_10_10_10_2_SNORM = 59, + VFMT_2_10_10_10_UINT = 60, + VFMT_2_10_10_10_UNORM = 61, + VFMT_2_10_10_10_SINT = 62, + VFMT_2_10_10_10_SNORM = 63, }; enum a3xx_tex_fmt { @@ -138,10 +142,12 @@ enum a3xx_tex_fmt { TFMT_DXT1 = 36, TFMT_DXT3 = 37, TFMT_DXT5 = 38, + TFMT_2_10_10_10_UNORM = 40, TFMT_10_10_10_2_UNORM = 41, TFMT_9_9_9_E5_FLOAT = 42, TFMT_11_11_10_FLOAT = 43, TFMT_A8_UNORM = 44, + TFMT_L8_UNORM = 45, TFMT_L8_A8_UNORM = 47, TFMT_8_UNORM = 48, TFMT_8_8_UNORM = 49, @@ -183,6 +189,8 @@ enum a3xx_tex_fmt { TFMT_32_SINT = 92, TFMT_32_32_SINT = 93, TFMT_32_32_32_32_SINT = 95, + TFMT_2_10_10_10_UINT = 96, + TFMT_10_10_10_2_UINT = 97, TFMT_ETC2_RG11_SNORM = 112, TFMT_ETC2_RG11_UNORM = 113, TFMT_ETC2_R11_SNORM = 114, @@ -215,6 +223,9 @@ enum a3xx_color_fmt { RB_R8_UINT = 14, RB_R8_SINT = 15, RB_R10G10B10A2_UNORM = 16, + RB_A2R10G10B10_UNORM = 17, + RB_R10G10B10A2_UINT = 18, + RB_A2R10G10B10_UINT = 19, RB_A8_UNORM = 20, RB_R8_UNORM = 21, RB_R16_FLOAT = 24, @@ -251,25 +262,6 @@ enum a3xx_sp_perfcounter_select { SP_ALU_ACTIVE_CYCLES = 29, }; -enum a3xx_rop_code { - ROP_CLEAR = 0, - ROP_NOR = 1, - ROP_AND_INVERTED = 2, - ROP_COPY_INVERTED = 3, - ROP_AND_REVERSE = 4, - ROP_INVERT = 5, - ROP_XOR = 6, - ROP_NAND = 7, - ROP_AND = 8, - ROP_EQUIV = 9, - ROP_NOOP = 10, - ROP_OR_INVERTED = 11, - ROP_COPY = 12, - ROP_OR_REVERSE = 13, - ROP_OR = 14, - ROP_SET = 15, -}; - enum a3xx_rb_blend_opcode { BLEND_DST_PLUS_SRC = 0, BLEND_SRC_MINUS_DST = 1, @@ -1620,12 +1612,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) } #define REG_A3XX_VFD_CONTROL_1 0x00002241 -#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f #define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) { return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK; } +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0 +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4 +static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK; +} +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00 +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8 +static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK; +} #define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 #define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 3906c9b996e..b8a31d84b3f 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -81,7 +81,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); + /* points + psize -> spritelist: */ if (ctx->rasterizer->point_size_per_vertex && + fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 8f9c8b0623c..24afbc9e956 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -209,13 +209,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, fd3_pipe_sampler_view(tex->textures[i]) : &dummy_view; struct fd_resource *rsc = fd_resource(view->base.texture); - unsigned start = fd_sampler_first_level(&view->base); - unsigned end = fd_sampler_last_level(&view->base);; + if (rsc && rsc->base.b.target == PIPE_BUFFER) { + OUT_RELOC(ring, rsc->bo, view->base.u.buf.first_element * + util_format_get_blocksize(view->base.format), 0, 0); + j = 1; + } else { + unsigned start = fd_sampler_first_level(&view->base); + unsigned end = fd_sampler_last_level(&view->base);; - for (j = 0; j < (end - start + 1); j++) { - struct fd_resource_slice *slice = + for (j = 0; j < (end - start + 1); j++) { + struct fd_resource_slice *slice = fd_resource_slice(rsc, j + start); - OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + } } /* pad the remaining entries w/ null: */ @@ -350,7 +356,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) unsigned instance_regid = regid(63, 0); unsigned vtxcnt_regid = regid(63, 0); + /* Note that sysvals come *after* normal inputs: */ for (i = 0; i < vp->inputs_count; i++) { + if (!vp->inputs[i].compmask) + continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { case SYSTEM_VALUE_BASE_VERTEX: @@ -369,18 +378,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) unreachable("invalid system value"); break; } - } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { + } else if (i < vtx->vtx->num_elements) { last = i; } } - /* hw doesn't like to be configured for zero vbo's, it seems: */ - if ((vtx->vtx->num_elements == 0) && - (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0)) && - (vtxcnt_regid == regid(63, 0))) - return; - for (i = 0, j = 0; i <= last; i++) { assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { @@ -424,6 +426,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (last < 0) { + /* just recycle the shader bo, we just need to point to *something* + * valid: + */ + struct fd_bo *dummy_vbo = vp->bo; + bool switchnext = (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); + + OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2); + OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | + A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | + COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | + A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) | + A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); + OUT_RELOC(ring, dummy_vbo, 0, 0, 0); + + OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1); + OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | + A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | + A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) | + A3XX_VFD_DECODE_INSTR_SWAP(XYZW) | + A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | + A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) | + A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in = 1; + j = 1; + } + OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | A3XX_VFD_CONTROL_0_PACKETSIZE(2) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 857d156c869..52ea9444517 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -188,9 +188,13 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_UINT, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), @@ -271,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(DXT3_SRGBA, DXT3, NONE, WZYX), _T(DXT5_RGBA, DXT5, NONE, WZYX), _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + /* faked */ + _T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), }; enum a3xx_vtx_fmt @@ -310,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format) { if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; + else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH_1_BYTE; case 16: return TFETCH_2_BYTE; @@ -324,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format) } } +unsigned +fd3_pipe2nblocksx(enum pipe_format format, unsigned width) +{ + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + return util_format_get_nblocksx(format, width); +} + /* we need to special case a bit the depth/stencil restore, because we are * using the texture sampler to blit into the depth/stencil buffer, *not* * into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h index 05c5ea3d247..48c503e9a82 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h @@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format); enum pipe_format fd3_gmem_restore_format(enum pipe_format format); enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format); enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); +unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width); uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index 2d6ecb2c050..99ae99ea0c1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -211,8 +211,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, { struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); - unsigned lvl = fd_sampler_first_level(cso); - unsigned miplevels = fd_sampler_last_level(cso) - lvl; + unsigned lvl; uint32_t sz2 = 0; if (!so) @@ -227,20 +226,34 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->texconst0 = A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | - A3XX_TEX_CONST_0_MIPLVLS(miplevels) | fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); if (util_format_is_srgb(cso->format)) so->texconst0 |= A3XX_TEX_CONST_0_SRGB; - so->texconst1 = + if (prsc->target == PIPE_BUFFER) { + lvl = 0; + so->texconst1 = + A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | + A3XX_TEX_CONST_1_WIDTH(cso->u.buf.last_element - + cso->u.buf.first_element + 1) | + A3XX_TEX_CONST_1_HEIGHT(1); + } else { + unsigned miplevels; + + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + + so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + } /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ so->texconst2 = - A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); switch (prsc->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index 9f970365464..a450379e98d 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -47,11 +47,13 @@ enum a4xx_color_fmt { RB4_R8_UNORM = 2, RB4_R4G4B4A4_UNORM = 8, RB4_R5G5B5A1_UNORM = 10, - RB4_R5G6R5_UNORM = 14, + RB4_R5G6B5_UNORM = 14, RB4_R8G8_UNORM = 15, RB4_R8G8_SNORM = 16, RB4_R8G8_UINT = 17, RB4_R8G8_SINT = 18, + RB4_R16_UNORM = 19, + RB4_R16_SNORM = 20, RB4_R16_FLOAT = 21, RB4_R16_UINT = 22, RB4_R16_SINT = 23, @@ -63,12 +65,16 @@ enum a4xx_color_fmt { RB4_R10G10B10A2_UNORM = 31, RB4_R10G10B10A2_UINT = 34, RB4_R11G11B10_FLOAT = 39, + RB4_R16G16_UNORM = 40, + RB4_R16G16_SNORM = 41, RB4_R16G16_FLOAT = 42, RB4_R16G16_UINT = 43, RB4_R16G16_SINT = 44, RB4_R32_FLOAT = 45, RB4_R32_UINT = 46, RB4_R32_SINT = 47, + RB4_R16G16B16A16_UNORM = 52, + RB4_R16G16B16A16_SNORM = 53, RB4_R16G16B16A16_FLOAT = 54, RB4_R16G16B16A16_UINT = 55, RB4_R16G16B16A16_SINT = 56, @@ -106,6 +112,7 @@ enum a4xx_vtx_fmt { VFMT4_32_32_FIXED = 10, VFMT4_32_32_32_FIXED = 11, VFMT4_32_32_32_32_FIXED = 12, + VFMT4_11_11_10_FLOAT = 13, VFMT4_16_SINT = 16, VFMT4_16_16_SINT = 17, VFMT4_16_16_16_SINT = 18, @@ -146,18 +153,19 @@ enum a4xx_vtx_fmt { VFMT4_8_8_SNORM = 53, VFMT4_8_8_8_SNORM = 54, VFMT4_8_8_8_8_SNORM = 55, - VFMT4_10_10_10_2_UINT = 60, - VFMT4_10_10_10_2_UNORM = 61, - VFMT4_10_10_10_2_SINT = 62, - VFMT4_10_10_10_2_SNORM = 63, + VFMT4_10_10_10_2_UINT = 56, + VFMT4_10_10_10_2_UNORM = 57, + VFMT4_10_10_10_2_SINT = 58, + VFMT4_10_10_10_2_SNORM = 59, }; enum a4xx_tex_fmt { TFMT4_5_6_5_UNORM = 11, - TFMT4_5_5_5_1_UNORM = 10, + TFMT4_5_5_5_1_UNORM = 9, TFMT4_4_4_4_4_UNORM = 8, TFMT4_X8Z24_UNORM = 71, TFMT4_10_10_10_2_UNORM = 33, + TFMT4_10_10_10_2_UINT = 34, TFMT4_A8_UNORM = 3, TFMT4_L8_A8_UNORM = 13, TFMT4_8_UNORM = 4, @@ -172,6 +180,12 @@ enum a4xx_tex_fmt { TFMT4_8_SINT = 7, TFMT4_8_8_SINT = 17, TFMT4_8_8_8_8_SINT = 31, + TFMT4_16_UNORM = 18, + TFMT4_16_16_UNORM = 38, + TFMT4_16_16_16_16_UNORM = 51, + TFMT4_16_SNORM = 19, + TFMT4_16_16_SNORM = 39, + TFMT4_16_16_16_16_SNORM = 52, TFMT4_16_UINT = 21, TFMT4_16_16_UINT = 41, TFMT4_16_16_16_16_UINT = 54, @@ -190,8 +204,21 @@ enum a4xx_tex_fmt { TFMT4_32_FLOAT = 43, TFMT4_32_32_FLOAT = 56, TFMT4_32_32_32_32_FLOAT = 63, + TFMT4_32_32_32_FLOAT = 59, + TFMT4_32_32_32_UINT = 60, + TFMT4_32_32_32_SINT = 61, TFMT4_9_9_9_E5_FLOAT = 32, TFMT4_11_11_10_FLOAT = 37, + TFMT4_DXT1 = 86, + TFMT4_DXT3 = 87, + TFMT4_DXT5 = 88, + TFMT4_RGTC1_UNORM = 90, + TFMT4_RGTC1_SNORM = 91, + TFMT4_RGTC2_UNORM = 94, + TFMT4_RGTC2_SNORM = 95, + TFMT4_BPTC_UFLOAT = 97, + TFMT4_BPTC_FLOAT = 98, + TFMT4_BPTC = 99, TFMT4_ATC_RGB = 100, TFMT4_ATC_RGBA_EXPLICIT = 101, TFMT4_ATC_RGBA_INTERPOLATED = 102, @@ -400,8 +427,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 #define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 #define A4XX_RB_MRT_CONTROL_BLEND 0x00000010 #define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020 -#define A4XX_RB_MRT_CONTROL_FASTCLEAR 0x00000400 -#define A4XX_RB_MRT_CONTROL_B11 0x00000800 +#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) @@ -600,7 +632,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val) { return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK; } -#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100 +#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16 static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) @@ -2056,6 +2088,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b #define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 +#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 #define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003 #define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001 @@ -2596,7 +2630,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 -#define REG_A4XX_UNKNOWN_21C5 0x000021c5 +#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040 #define REG_A4XX_PC_RESTART_INDEX 0x000021c6 @@ -2738,6 +2785,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val) { return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK; } +#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK; +} #define REG_A4XX_TEX_SAMP_1 0x00000001 #define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e @@ -2746,6 +2799,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val { return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK; } +#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 #define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 #define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 #define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 @@ -2814,7 +2868,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val) { return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK; } -#define A4XX_TEX_CONST_1_WIDTH__MASK 0x1fff8000 +#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000 #define A4XX_TEX_CONST_1_WIDTH__SHIFT 15 static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c index d5e823ef69d..f19702280e0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c @@ -27,6 +27,7 @@ */ #include "pipe/p_state.h" +#include "util/u_blend.h" #include "util/u_string.h" #include "util/u_memory.h" @@ -59,12 +60,12 @@ fd4_blend_state_create(struct pipe_context *pctx, const struct pipe_blend_state *cso) { struct fd4_blend_stateobj *so; -// enum a3xx_rop_code rop = ROP_COPY; + enum a3xx_rop_code rop = ROP_COPY; bool reads_dest = false; unsigned i, mrt_blend = 0; if (cso->logicop_enable) { -// rop = cso->logicop_func; /* maps 1:1 */ + rop = cso->logicop_func; /* maps 1:1 */ switch (cso->logicop_func) { case PIPE_LOGICOP_NOR: @@ -98,16 +99,25 @@ fd4_blend_state_create(struct pipe_context *pctx, else rt = &cso->rt[0]; - so->rb_mrt[i].blend_control = + so->rb_mrt[i].blend_control_rgb = A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | - A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)); + + so->rb_mrt[i].blend_control_alpha = A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); + so->rb_mrt[i].blend_control_no_alpha_rgb = + A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor))); + + so->rb_mrt[i].control = - 0xc00 | /* XXX ROP_CODE ?? */ + A4XX_RB_MRT_CONTROL_ROP_CODE(rop) | + COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); if (rt->blend_enable) { @@ -118,14 +128,17 @@ fd4_blend_state_create(struct pipe_context *pctx, mrt_blend |= (1 << i); } - if (reads_dest) + if (reads_dest) { so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + mrt_blend |= (1 << i); + } if (cso->dither) so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); } - so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend); + so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) | + COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND); return so; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h index 7620d00a625..6230fa7a50e 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h @@ -39,7 +39,12 @@ struct fd4_blend_stateobj { struct { uint32_t control; uint32_t buf_info; - uint32_t blend_control; + /* Blend control bits for color if there is an alpha channel */ + uint32_t blend_control_rgb; + /* Blend control bits for color if there is no alpha channel */ + uint32_t blend_control_no_alpha_rgb; + /* Blend control bits for alpha channel */ + uint32_t blend_control_alpha; } rb_mrt[A4XX_MAX_RENDER_TARGETS]; uint32_t rb_fs_output; }; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 7bd5163529a..8cbe68d5790 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -47,6 +47,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit) { const struct pipe_draw_info *info = emit->info; + enum pc_di_primtype primtype = ctx->primtypes[info->mode]; if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit))) return; @@ -64,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); + /* points + psize -> spritelist: */ + if (ctx->rasterizer->point_size_per_vertex && + fd4_emit_get_vp(emit)->writes_psize && + (info->mode == PIPE_PRIM_POINTS)) + primtype = DI_PT_POINTLIST_PSIZE; + fd4_draw_emit(ctx, ring, + primtype, emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info); } @@ -263,8 +271,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0; OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | - A4XX_RB_MRT_CONTROL_B11 | + OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h index b89a30a7c4b..a6c56404a8a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h @@ -101,12 +101,12 @@ fd4_size2indextype(unsigned index_size) } static inline void fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info) { struct pipe_index_buffer *idx = &ctx->indexbuf; struct fd_bo *idx_bo = NULL; - enum pc_di_primtype primtype = ctx->primtypes[info->mode]; enum a4xx_index_size idx_type; enum pc_di_src_sel src_sel; uint32_t idx_size, idx_offset; @@ -127,11 +127,6 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, src_sel = DI_SRC_SEL_AUTO_INDEX; } - /* points + psize -> spritelist: */ - if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex && - (info->mode == PIPE_PRIM_POINTS)) - primtype = DI_PT_POINTLIST_PSIZE; - fd4_draw(ctx, ring, primtype, vismode, src_sel, info->count, info->instance_count, idx_type, idx_size, idx_offset, idx_bo); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 26b58718cd8..f220fc7ac1f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -185,7 +185,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct fd4_pipe_sampler_view *view = tex->textures[i] ? fd4_pipe_sampler_view(tex->textures[i]) : &dummy_view; - unsigned start = fd_sampler_first_level(&view->base); OUT_RING(ring, view->texconst0); OUT_RING(ring, view->texconst1); @@ -193,8 +192,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, view->texconst3); if (view->base.texture) { struct fd_resource *rsc = fd_resource(view->base.texture); - uint32_t offset = fd_resource_offset(rsc, start, 0); - OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0); + OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); } else { OUT_RING(ring, 0x00000000); } @@ -286,7 +284,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) | A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height)); - OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format))); OUT_RING(ring, 0x00000000); OUT_RELOC(ring, rsc->bo, offset, 0, 0); OUT_RING(ring, 0x00000000); @@ -332,7 +331,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unsigned instance_regid = regid(63, 0); unsigned vtxcnt_regid = regid(63, 0); + /* Note that sysvals come *after* normal inputs: */ for (i = 0; i < vp->inputs_count; i++) { + if (!vp->inputs[i].compmask) + continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { case SYSTEM_VALUE_BASE_VERTEX: @@ -351,19 +353,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unreachable("invalid system value"); break; } - } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { + } else if (i < vtx->vtx->num_elements) { last = i; } } - - /* hw doesn't like to be configured for zero vbo's, it seems: */ - if ((vtx->vtx->num_elements == 0) && - (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0)) && - (vtxcnt_regid == regid(63, 0))) - return; - for (i = 0, j = 0; i <= last; i++) { assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { @@ -408,6 +402,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (last < 0) { + /* just recycle the shader bo, we just need to point to *something* + * valid: + */ + struct fd_bo *dummy_vbo = vp->bo; + bool switchnext = (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); + + OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | + A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | + COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT)); + OUT_RELOC(ring, dummy_vbo, 0, 0, 0); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1)); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1)); + + OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1); + OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL | + A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | + A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) | + A4XX_VFD_DECODE_INSTR_SWAP(XYZW) | + A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | + A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) | + A4XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in = 1; + j = 1; + } + OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5); OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | 0xa0000 | /* XXX */ @@ -470,11 +496,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches); } - if (dirty & FD_DIRTY_ZSA) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + uint32_t rb_alpha_control = zsa->rb_alpha_control; + + if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) + rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST; OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); - OUT_RING(ring, zsa->rb_alpha_control); + OUT_RING(ring, rb_alpha_control); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, zsa->rb_stencil_control); @@ -535,8 +566,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, */ if (emit->info) { const struct pipe_draw_info *info = emit->info; - uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) - ->pc_prim_vtx_cntl; + struct fd4_rasterizer_stateobj *rast = + fd4_rasterizer_stateobj(ctx->rasterizer); + uint32_t val = rast->pc_prim_vtx_cntl; if (info->indexed && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; @@ -552,7 +584,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); OUT_RING(ring, val); - OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */ + OUT_RING(ring, rast->pc_prim_vtx_cntl2); } if (dirty & FD_DIRTY_SCISSOR) { @@ -581,7 +613,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) { + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs); } @@ -599,11 +631,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, uint32_t i; for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format format = pipe_surface_format( + ctx->framebuffer.cbufs[i]); + bool is_int = util_format_is_pure_integer(format); + bool has_alpha = util_format_has_alpha(format); + uint32_t control = blend->rb_mrt[i].control; + uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; + + if (is_int) { + control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + } + + if (has_alpha) { + blend_control |= blend->rb_mrt[i].blend_control_rgb; + } else { + blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + control &= ~A4XX_RB_MRT_CONTROL_BLEND2; + } + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].control); + OUT_RING(ring, control); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].blend_control); + OUT_RING(ring, blend_control); } OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); @@ -611,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); } - if (dirty & FD_DIRTY_BLEND_COLOR) { + if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) { struct pipe_blend_color *bcolor = &ctx->blend_color; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + float factor = 65535.0; + int i; + + for (i = 0; i < pfb->nr_cbufs; i++) { + enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); + const struct util_format_description *desc = + util_format_description(format); + int j; + + if (desc->is_mixed) + continue; + + j = util_format_get_first_non_void_channel(format); + if (j == -1) + continue; + + if (desc->channel[j].size > 8 || !desc->channel[j].normalized || + desc->channel[j].pure_integer) + continue; + + /* Just use the first unorm8/snorm8 render buffer. Can't keep + * everyone happy. + */ + if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED) + factor = 32767.0; + break; + } + OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8); - OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) | A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0])); - OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) | A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1])); - OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) | A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2])); - OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) | A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index 847d4fb6d63..c240745cec1 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -99,20 +99,26 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), /* 16-bit */ - V_(R16_UNORM, 16_UNORM, NONE, WZYX), - V_(R16_SNORM, 16_SNORM, NONE, WZYX), - VT(R16_UINT, 16_UINT, R16_UINT, WZYX), - VT(R16_SINT, 16_SINT, R16_SINT, WZYX), - V_(R16_USCALED, 16_UINT, NONE, WZYX), - V_(R16_SSCALED, 16_UINT, NONE, WZYX), - VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX), - - _T(A16_UINT, 16_UINT, NONE, WZYX), - _T(A16_SINT, 16_SINT, NONE, WZYX), - _T(L16_UINT, 16_UINT, NONE, WZYX), - _T(L16_SINT, 16_SINT, NONE, WZYX), - _T(I16_UINT, 16_UINT, NONE, WZYX), - _T(I16_SINT, 16_SINT, NONE, WZYX), + VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), + VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), + VT(R16_UINT, 16_UINT, R16_UINT, WZYX), + VT(R16_SINT, 16_SINT, R16_SINT, WZYX), + V_(R16_USCALED, 16_UINT, NONE, WZYX), + V_(R16_SSCALED, 16_UINT, NONE, WZYX), + VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX), + + _T(A16_UNORM, 16_UNORM, NONE, WZYX), + _T(A16_SNORM, 16_SNORM, NONE, WZYX), + _T(A16_UINT, 16_UINT, NONE, WZYX), + _T(A16_SINT, 16_SINT, NONE, WZYX), + _T(L16_UNORM, 16_UNORM, NONE, WZYX), + _T(L16_SNORM, 16_SNORM, NONE, WZYX), + _T(L16_UINT, 16_UINT, NONE, WZYX), + _T(L16_SINT, 16_SINT, NONE, WZYX), + _T(I16_UNORM, 16_UNORM, NONE, WZYX), + _T(I16_SNORM, 16_SNORM, NONE, WZYX), + _T(I16_UINT, 16_UINT, NONE, WZYX), + _T(I16_SINT, 16_SINT, NONE, WZYX), VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), @@ -124,6 +130,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(L8A8_UINT, 8_8_UINT, NONE, WZYX), _T(L8A8_SINT, 8_8_SINT, NONE, WZYX), + _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ), @@ -151,16 +158,18 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(I32_UINT, 32_UINT, NONE, WZYX), _T(I32_SINT, 32_SINT, NONE, WZYX), - V_(R16G16_UNORM, 16_16_UNORM, NONE, WZYX), - V_(R16G16_SNORM, 16_16_SNORM, NONE, WZYX), - VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), - VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), - V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), - V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), - VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX), + VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), + VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), + VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), + VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), + V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), + V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), + VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), - _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), - _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), + _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX), + _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX), + _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), + _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), _T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), @@ -191,11 +200,15 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), - V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), + VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), + VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), - _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), + VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), @@ -213,8 +226,10 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX), /* 64-bit */ - V_(R16G16B16A16_UNORM, 16_16_16_16_UNORM, NONE, WZYX), - V_(R16G16B16A16_SNORM, 16_16_16_16_SNORM, NONE, WZYX), + VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), + VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), _T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), @@ -235,11 +250,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(L32A32_SINT, 32_32_SINT, NONE, WZYX), /* 96-bit */ - V_(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), - V_(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), + VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), + VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX), V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX), - V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), + VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX), /* 128-bit */ @@ -252,6 +267,72 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX), + + /* compressed */ + _T(ETC1_RGB8, ETC1, NONE, WZYX), + _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX), + _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX), + _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX), + _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX), + + _T(DXT1_RGB, DXT1, NONE, WZYX), + _T(DXT1_SRGB, DXT1, NONE, WZYX), + _T(DXT1_RGBA, DXT1, NONE, WZYX), + _T(DXT1_SRGBA, DXT1, NONE, WZYX), + _T(DXT3_RGBA, DXT3, NONE, WZYX), + _T(DXT3_SRGBA, DXT3, NONE, WZYX), + _T(DXT5_RGBA, DXT5, NONE, WZYX), + _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX), + _T(BPTC_SRGBA, BPTC, NONE, WZYX), + _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX), + _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX), + + _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX), + _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX), + + _T(ASTC_4x4, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12, ASTC_12x12, NONE, WZYX), + + _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX), }; /* convert pipe format to vertex buffer format: */ @@ -295,11 +376,15 @@ fd4_pipe2fetchsize(enum pipe_format format) if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; - switch (util_format_get_blocksizebits(format)) { + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC) + return TFETCH4_16_BYTE; + + switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH4_1_BYTE; case 16: return TFETCH4_2_BYTE; case 32: return TFETCH4_4_BYTE; case 64: return TFETCH4_8_BYTE; + case 96: return TFETCH4_1_BYTE; /* Does this matter? */ case 128: return TFETCH4_16_BYTE; default: debug_printf("Unknown block size for format %s: %d\n", diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 3f8bbf3a124..221608127b4 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -347,8 +347,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | - A4XX_RB_MRT_CONTROL_B11 | + OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index e3d5dabab4c..3df13543148 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -245,13 +245,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - /* adjust regids for alpha output formats. there is no alpha render - * format, so it's just treated like red - */ - for (i = 0; i < nr; i++) - if (util_format_is_alpha(pipe_surface_format(bufs[i]))) - color_regid[i] += 3; - /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c index dc7e98b149d..7456c63febe 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c @@ -77,6 +77,13 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, so->gras_su_mode_control = A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0); + so->pc_prim_vtx_cntl2 = + A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | + A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); + + if (cso->fill_front != PIPE_POLYGON_MODE_FILL || + cso->fill_back != PIPE_POLYGON_MODE_FILL) + so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE; if (cso->cull_face & PIPE_FACE_FRONT) so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; @@ -90,5 +97,10 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, if (cso->offset_tri) so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; + if (!cso->depth_clip) + so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE; + if (cso->clip_halfz) + so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z; + return so; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h index 64e81a9983b..b56a04da6a8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h @@ -42,6 +42,7 @@ struct fd4_rasterizer_stateobj { uint32_t gras_su_mode_control; uint32_t gras_cl_clip_cntl; uint32_t pc_prim_vtx_cntl; + uint32_t pc_prim_vtx_cntl2; }; static inline struct fd4_rasterizer_stateobj * diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c index d8ea414f300..b2a69cca56c 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c @@ -57,6 +57,8 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen, } if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (target == PIPE_BUFFER || + util_format_get_blocksize(format) != 12) && (fd4_pipe2tex(format) != ~0)) { retval |= PIPE_BIND_SAMPLER_VIEW; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index dbff5a738fd..0eba75577b0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -124,9 +124,11 @@ fd4_sampler_state_create(struct pipe_context *pctx, so->texsamp1 = // COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | + COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS); if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); so->texsamp1 |= A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); @@ -210,8 +212,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, { struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); - unsigned lvl = fd_sampler_first_level(cso); - unsigned miplevels = fd_sampler_last_level(cso) - lvl; + unsigned lvl, layers; + uint32_t sz2 = 0; if (!so) return NULL; @@ -223,39 +225,65 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->base.context = pctx; so->texconst0 = - A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | + A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) | A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) | - A4XX_TEX_CONST_0_MIPLVLS(miplevels) | fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); if (util_format_is_srgb(cso->format)) so->texconst0 |= A4XX_TEX_CONST_0_SRGB; - so->texconst1 = - A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | - A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); - so->texconst2 = - A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | - A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp); + if (cso->target == PIPE_BUFFER) { + unsigned elements = cso->u.buf.last_element - + cso->u.buf.first_element + 1; + lvl = 0; + so->texconst1 = + A4XX_TEX_CONST_1_WIDTH(elements) | + A4XX_TEX_CONST_1_HEIGHT(1); + so->texconst2 = + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | + A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp); + so->offset = cso->u.buf.first_element * + util_format_get_blocksize(cso->format); + } else { + unsigned miplevels; - switch (prsc->target) { + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; + + so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = + A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | + A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + so->texconst2 = + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | + A4XX_TEX_CONST_2_PITCH( + util_format_get_nblocksx( + cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); + } + + switch (cso->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: so->texconst3 = - A4XX_TEX_CONST_3_DEPTH(prsc->array_size) | + A4XX_TEX_CONST_3_DEPTH(layers) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: so->texconst3 = - A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) | + A4XX_TEX_CONST_3_DEPTH(layers / 6) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_3D: so->texconst3 = A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) | - A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0); + A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0); + while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0) + sz2 = rsc->slices[++lvl].size0; + so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2); break; default: so->texconst3 = 0x00000000; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h index 31955770a85..6ca34ade60d 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h @@ -51,7 +51,8 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp) struct fd4_pipe_sampler_view { struct pipe_sampler_view base; - uint32_t texconst0, texconst1, texconst2, texconst3, textconst4; + uint32_t texconst0, texconst1, texconst2, texconst3, texconst4; + uint32_t offset; }; static inline struct fd4_pipe_sampler_view * diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index ca3d2ac3fca..0e0f0e65e9b 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -119,6 +119,25 @@ enum adreno_rb_copy_control_mode { RB_COPY_DEPTH_STENCIL = 5, }; +enum a3xx_rop_code { + ROP_CLEAR = 0, + ROP_NOR = 1, + ROP_AND_INVERTED = 2, + ROP_COPY_INVERTED = 3, + ROP_AND_REVERSE = 4, + ROP_INVERT = 5, + ROP_XOR = 6, + ROP_NAND = 7, + ROP_AND = 8, + ROP_EQUIV = 9, + ROP_NOOP = 10, + ROP_OR_INVERTED = 11, + ROP_COPY = 12, + ROP_OR_REVERSE = 13, + ROP_OR = 14, + ROP_SET = 15, +}; + enum a3xx_render_mode { RB_RENDERING_PASS = 0, RB_TILING_PASS = 1, diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index f095e3061b2..4aabc086607 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 61c4c6d6e24..571c8142bf7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -359,6 +359,10 @@ struct fd_context { struct fd_streamout_stateobj streamout; struct pipe_clip_state ucp; + struct pipe_query *cond_query; + bool cond_cond; /* inverted rendering condition */ + uint cond_mode; + /* GMEM/tile handling fxns: */ void (*emit_tile_init)(struct fd_context *ctx); void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile); diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 7bf3343f43a..bf803cc77bc 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -88,6 +88,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) return; } + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; + /* emulate unsupported primitives: */ if (!fd_supported_prim(ctx, info->mode)) { if (ctx->streamout.num_targets > 0) @@ -220,6 +224,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, unsigned cleared_buffers; int i; + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; + /* for bookkeeping about which buffers have been cleared (and thus * can fully or partially skip mem2gmem) we need to ignore buffers * that have already had a draw, in case apps do silly things like diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c index db2683c9b6f..b87e8250719 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.c +++ b/src/gallium/drivers/freedreno/freedreno_query.c @@ -81,6 +81,16 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq, return q->funcs->get_query_result(fd_context(pctx), q, wait, result); } +static void +fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq, + boolean condition, uint mode) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->cond_query = pq; + ctx->cond_cond = condition; + ctx->cond_mode = mode; +} + static int fd_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info) @@ -118,4 +128,5 @@ fd_query_context_init(struct pipe_context *pctx) pctx->begin_query = fd_begin_query; pctx->end_query = fd_end_query; pctx->get_query_result = fd_get_query_result; + pctx->render_condition = fd_render_condition; } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 98de0969cab..63ca9e30620 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -27,6 +27,7 @@ */ #include "util/u_format.h" +#include "util/u_format_rgtc.h" #include "util/u_format_zs.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) util_range_set_empty(&rsc->valid_buffer_range); } -/* Currently this is only used for flushing Z32_S8 texture transfers, but - * eventually it should handle everything. - */ +static unsigned +fd_resource_layer_offset(struct fd_resource *rsc, + struct fd_resource_slice *slice, + unsigned layer) +{ + if (rsc->layer_first) + return layer * rsc->layer_size; + else + return layer * slice->size0; +} + static void -fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box) { struct fd_resource *rsc = fd_resource(trans->base.resource); struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); @@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) enum pipe_format format = trans->base.resource->format; float *depth = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4; uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) + (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x; - assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || - format == PIPE_FORMAT_X32_S8X24_UINT); - if (format != PIPE_FORMAT_X32_S8X24_UINT) util_format_z32_float_s8x24_uint_unpack_z_float( depth, slice->pitch * 4, @@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) box->width, box->height); } +static void +fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box) +{ + struct fd_resource *rsc = fd_resource(trans->base.resource); + struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); + enum pipe_format format = trans->base.resource->format; + + uint8_t *data = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + + ((trans->base.box.y + box->y) * slice->pitch + + trans->base.box.x + box->x) * rsc->cpp; + + uint8_t *source = trans->staging + + util_format_get_nblocksy(format, box->y) * trans->base.stride + + util_format_get_stride(format, box->x); + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + default: + assert(!"Unexpected format\n"); + break; + } +} + +static void +fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +{ + enum pipe_format format = trans->base.resource->format; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + fd_resource_flush_z32s8(trans, box); + break; + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + fd_resource_flush_rgtc(trans, box); + break; + default: + assert(!"Unexpected staging transfer type"); + break; + } +} + static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) @@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; } - if (rsc->layer_first) { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * rsc->layer_size; - } else { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * slice->size0; - } + offset = slice->offset + + box->y / util_format_get_blockheight(format) * ptrans->stride + + box->x / util_format_get_blockwidth(format) * rsc->cpp + + fd_resource_layer_offset(rsc, slice, box->z); if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || prsc->format == PIPE_FORMAT_X32_S8X24_UINT) { + assert(trans->base.box.depth == 1); + trans->base.stride = trans->base.box.width * rsc->cpp * 2; trans->staging = malloc(trans->base.stride * trans->base.box.height); if (!trans->staging) @@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx, goto fail; float *depth = (float *)(buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + box->y * slice->pitch * 4 + box->x * 4); uint8_t *stencil = sbuf + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, box->z) + box->y * sslice->pitch + box->x; if (format != PIPE_FORMAT_X32_S8X24_UINT) @@ -316,6 +388,54 @@ fd_resource_transfer_map(struct pipe_context *pctx, buf = trans->staging; offset = 0; + } else if (rsc->internal_format != format && + util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) { + assert(trans->base.box.depth == 1); + + trans->base.stride = util_format_get_stride( + format, trans->base.box.width); + trans->staging = malloc( + util_format_get_2d_size(format, trans->base.stride, + trans->base.box.height)); + if (!trans->staging) + goto fail; + + /* if we're not discarding the whole range (or resource), we must copy + * the real data in. + */ + if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE))) { + uint8_t *rgba8 = (uint8_t *)buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + + box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp; + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + default: + assert(!"Unexpected format"); + break; + } + } + + buf = trans->staging; + offset = 0; } *pptrans = ptrans; @@ -361,9 +481,10 @@ static const struct u_resource_vtbl fd_resource_vtbl = { }; static uint32_t -setup_slices(struct fd_resource *rsc, uint32_t alignment) +setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) { struct pipe_resource *prsc = &rsc->base.b; + enum util_format_layout layout = util_format_description(format)->layout; uint32_t level, size = 0; uint32_t width = prsc->width0; uint32_t height = prsc->height0; @@ -377,9 +498,13 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment) struct fd_resource_slice *slice = fd_resource_slice(rsc, level); uint32_t blocks; - slice->pitch = width = align(width, 32); + if (layout == UTIL_FORMAT_LAYOUT_ASTC) + slice->pitch = width = + util_align_npot(width, 32 * util_format_get_blockwidth(format)); + else + slice->pitch = width = align(width, 32); slice->offset = size; - blocks = util_format_get_nblocks(prsc->format, width, height); + blocks = util_format_get_nblocks(format, width, height); /* 1d array and 2d array textures must all have the same layer size * for each miplevel on a3xx. 3d textures can have different layer * sizes for high levels, but the hw auto-sizer is buggy (or at least @@ -430,11 +555,12 @@ fd_resource_create(struct pipe_screen *pscreen, { struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); struct pipe_resource *prsc = &rsc->base.b; - uint32_t size; + enum pipe_format format = tmpl->format; + uint32_t size, alignment; DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " "nr_samples=%u, usage=%u, bind=%x, flags=%x", - tmpl->target, util_format_name(tmpl->format), + tmpl->target, util_format_name(format), tmpl->width0, tmpl->height0, tmpl->depth0, tmpl->array_size, tmpl->last_level, tmpl->nr_samples, tmpl->usage, tmpl->bind, tmpl->flags); @@ -451,13 +577,18 @@ fd_resource_create(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); rsc->base.vtbl = &fd_resource_vtbl; - if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) - rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT); - else - rsc->cpp = util_format_get_blocksize(tmpl->format); + + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + else if (fd_screen(pscreen)->gpu_id < 400 && + util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + rsc->internal_format = format; + rsc->cpp = util_format_get_blocksize(format); assert(rsc->cpp); + alignment = slice_alignment(pscreen, tmpl); if (is_a4xx(fd_screen(pscreen))) { switch (tmpl->target) { case PIPE_TEXTURE_3D: @@ -465,11 +596,12 @@ fd_resource_create(struct pipe_screen *pscreen, break; default: rsc->layer_first = true; + alignment = 1; break; } } - size = setup_slices(rsc, slice_alignment(pscreen, tmpl)); + size = setup_slices(rsc, alignment, format); if (rsc->layer_first) { rsc->layer_size = align(size, 4096); @@ -548,7 +680,7 @@ fail: return NULL; } -static void fd_blitter_pipe_begin(struct fd_context *ctx); +static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond); static void fd_blitter_pipe_end(struct fd_context *ctx); /** @@ -570,7 +702,7 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx, if (!util_blitter_is_copy_supported(ctx->blitter, dst, src)) return false; - fd_blitter_pipe_begin(ctx); + fd_blitter_pipe_begin(ctx, false); util_blitter_copy_texture(ctx->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); @@ -612,6 +744,25 @@ fd_resource_copy_region(struct pipe_context *pctx, src, src_level, src_box); } +bool +fd_render_condition_check(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + if (!ctx->cond_query) + return true; + + union pipe_query_result res = { 0 }; + bool wait = + ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT && + ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; + + if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res)) + return (bool)res.u64 != ctx->cond_cond; + + return true; +} + /** * Optimal hardware path for blitting pixels. * Scaling, format conversion, up- and downsampling (resolve) are allowed. @@ -630,6 +781,9 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) return; } + if (info.render_condition_enable && !fd_render_condition_check(pctx)) + return; + if (util_try_blit_via_copy_region(pctx, &info)) { return; /* done */ } @@ -646,13 +800,13 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) return; } - fd_blitter_pipe_begin(ctx); + fd_blitter_pipe_begin(ctx, info.render_condition_enable); util_blitter_blit(ctx->blitter, &info); fd_blitter_pipe_end(ctx); } static void -fd_blitter_pipe_begin(struct fd_context *ctx) +fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond) { util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb); util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx); @@ -673,6 +827,9 @@ fd_blitter_pipe_begin(struct fd_context *ctx) (void **)ctx->fragtex.samplers); util_blitter_save_fragment_sampler_views(ctx->blitter, ctx->fragtex.num_textures, ctx->fragtex.textures); + if (!render_cond) + util_blitter_save_render_condition(ctx->blitter, + ctx->cond_query, ctx->cond_cond, ctx->cond_mode); fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT); } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index 7549becaa1f..9a9b0d08244 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -73,6 +73,7 @@ struct fd_resource { struct u_resource base; struct fd_bo *bo; uint32_t cpp; + enum pipe_format internal_format; bool layer_first; /* see above description */ uint32_t layer_size; struct fd_resource_slice slices[MAX_MIP_LEVELS]; @@ -135,4 +136,6 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer) void fd_resource_screen_init(struct pipe_screen *pscreen); void fd_resource_context_init(struct pipe_context *pctx); +bool fd_render_condition_check(struct pipe_context *pctx); + #endif /* FREEDRENO_RESOURCE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 56d1834ef9c..5bbe4016a2a 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -160,11 +160,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: - case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_START_INSTANCE: case PIPE_CAP_COMPUTE: return 0; @@ -176,27 +174,31 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_CLIP_HALFZ: return is_a3xx(screen) || is_a4xx(screen); case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: - /* ignoring first/last_element.. but I guess that should be - * easy to add.. - */ + if (is_a3xx(screen)) return 16; + if (is_a4xx(screen)) return 32; return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - /* I think 32k on a4xx.. and we could possibly emulate more - * by pretending 2d/rect textures and splitting high bits - * of index into 2nd dimension.. + /* We could possibly emulate more by pretending 2d/rect textures and + * splitting high bits of index into 2nd dimension.. */ - return 16383; - - case PIPE_CAP_DEPTH_CLIP_DISABLE: - case PIPE_CAP_CLIP_HALFZ: - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - return is_a3xx(screen); + if (is_a3xx(screen)) return 8192; + if (is_a4xx(screen)) return 16384; + return 0; case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_TEXTURE_QUERY_LOD: return is_a4xx(screen); case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -205,7 +207,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL: if (glsl120) return 120; - return is_ir3(screen) ? 130 : 120; + return is_ir3(screen) ? 140 : 120; /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -220,15 +222,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_TEXTURE_GATHER_SM5: - case PIPE_CAP_FAKE_SW_MSAA: - case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: - case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: - case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c index 04e4643b4c9..f5611abaec8 100644 --- a/src/gallium/drivers/freedreno/freedreno_texture.c +++ b/src/gallium/drivers/freedreno/freedreno_texture.c @@ -197,33 +197,15 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr, continue; const struct util_format_channel_description *chan = - &desc->channel[desc->swizzle[j]]; - int size = chan->size; - - /* The Z16 texture format we use seems to look in the - * 32-bit border color slots - */ - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) - size = 32; - - /* Formats like R11G11B10 or RGB9_E5 don't specify - * per-channel sizes properly. - */ - if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER) - size = 16; - - if (chan->pure_integer && size > 16) - bcolor32[desc->swizzle[j] + 4] = - sampler->border_color.i[j]; - else if (size > 16) - bcolor32[desc->swizzle[j]] = - fui(sampler->border_color.f[j]); - else if (chan->pure_integer) - bcolor[desc->swizzle[j] + 8] = - sampler->border_color.i[j]; - else + &desc->channel[desc->swizzle[j]]; + if (chan->pure_integer) { + bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j]; + bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j]; + } else { + bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]); bcolor[desc->swizzle[j]] = - util_float_to_half(sampler->border_color.f[j]); + util_float_to_half(sampler->border_color.f[j]); + } } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 157dc73a3c6..156bb0be247 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1177,6 +1177,33 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0); break; + case nir_op_bit_count: + dst[0] = ir3_CBITS_B(b, src[0], 0); + break; + case nir_op_ifind_msb: { + struct ir3_instruction *cmp; + dst[0] = ir3_CLZ_S(b, src[0], 0); + cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0); + cmp->cat2.condition = IR3_COND_GE; + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + cmp, 0, dst[0], 0); + break; + } + case nir_op_ufind_msb: + dst[0] = ir3_CLZ_B(b, src[0], 0); + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + src[0], 0, dst[0], 0); + break; + case nir_op_find_lsb: + dst[0] = ir3_BFREV_B(b, src[0], 0); + dst[0] = ir3_CLZ_B(b, dst[0], 0); + break; + case nir_op_bitfield_reverse: + dst[0] = ir3_BFREV_B(b, src[0], 0); + break; + default: compile_error(ctx, "Unhandled ALU op: %s\n", nir_op_infos[alu->op].name); @@ -1547,10 +1574,10 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp) unreachable("bad sampler_dim"); } - if (tex->is_shadow) + if (tex->is_shadow && tex->op != nir_texop_lod) flags |= IR3_INSTR_S; - if (tex->is_array) + if (tex->is_array && tex->op != nir_texop_lod) flags |= IR3_INSTR_A; *flagsp = flags; @@ -1618,12 +1645,13 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) case nir_texop_txl: opc = OPC_SAML; break; case nir_texop_txd: opc = OPC_SAMGQ; break; case nir_texop_txf: opc = OPC_ISAML; break; + case nir_texop_lod: opc = OPC_GETLOD; break; case nir_texop_txf_ms: case nir_texop_txs: - case nir_texop_lod: case nir_texop_tg4: case nir_texop_query_levels: case nir_texop_texture_samples: + case nir_texop_samples_identical: compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op); return; } @@ -1665,10 +1693,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) src0[nsrc0++] = create_immed(b, fui(0.5)); } - if (tex->is_shadow) + if (tex->is_shadow && tex->op != nir_texop_lod) src0[nsrc0++] = compare; - if (tex->is_array) + if (tex->is_array && tex->op != nir_texop_lod) src0[nsrc0++] = coord[coords]; if (has_proj) { @@ -1717,7 +1745,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) case nir_type_int: type = TYPE_S32; break; - case nir_type_unsigned: + case nir_type_uint: case nir_type_bool: type = TYPE_U32; break; @@ -1725,12 +1753,26 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) unreachable("bad dest_type"); } + if (opc == OPC_GETLOD) + type = TYPE_U32; + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags, tex->sampler_index, tex->sampler_index, create_collect(b, src0, nsrc0), create_collect(b, src1, nsrc1)); split_dest(b, dst, sam, 4); + + /* GETLOD returns results in 4.8 fixed point */ + if (opc == OPC_GETLOD) { + struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256)); + + compile_assert(ctx, tex->dest_type == nir_type_float); + for (i = 0; i < 2; i++) { + dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0, + factor, 0); + } + } } static void @@ -1889,6 +1931,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr) case nir_texop_query_levels: emit_tex_query_levels(ctx, tex); break; + case nir_texop_samples_identical: + unreachable("nir_texop_samples_identical"); default: emit_tex(ctx, tex); break; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 7e2c27d9765..5d1cccb0daa 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -166,7 +166,9 @@ struct ir3_shader_variant { } outputs[16 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_psize; - /* vertices/inputs: */ + /* attributes (VS) / varyings (FS): + * Note that sysval's should come *after* normal inputs. + */ unsigned inputs_count; struct { uint8_t slot; @@ -229,7 +231,7 @@ struct ir3_shader { struct ir3_compiler *compiler; - struct pipe_context *pctx; + struct pipe_context *pctx; /* TODO replace w/ pipe_screen */ const struct tgsi_token *tokens; struct pipe_stream_output_info stream_output; diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 83f81135590..31a93659647 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -64,6 +64,8 @@ NV50_C_SOURCES := \ nv50/nv50_3ddefs.xml.h \ nv50/nv50_3d.xml.h \ nv50/nv50_blit.h \ + nv50/nv50_compute.c \ + nv50/nv50_compute.xml.h \ nv50/nv50_context.c \ nv50/nv50_context.h \ nv50/nv50_defs.xml.h \ @@ -76,6 +78,10 @@ NV50_C_SOURCES := \ nv50/nv50_query.h \ nv50/nv50_query_hw.c \ nv50/nv50_query_hw.h \ + nv50/nv50_query_hw_metric.c \ + nv50/nv50_query_hw_metric.h \ + nv50/nv50_query_hw_sm.c \ + nv50/nv50_query_hw_sm.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 2a13e1086a0..9f84de03a4a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -2357,6 +2357,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_PFETCH: emitPFETCH(insn); break; + case OP_AFETCH: + emitAFETCH(insn); + break; case OP_EMIT: case OP_RESTART: emitOUT(insn); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 7859c8e79bd..41d2cc9167c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) Instruction *st; if (slot->reg.file == FILE_MEMORY_LOCAL) { - st = new_Instruction(func, OP_STORE, ty); - st->setSrc(0, slot); - st->setSrc(1, lval); lval->noSpill = 1; + if (ty != TYPE_B96) { + st = new_Instruction(func, OP_STORE, ty); + st->setSrc(0, slot); + st->setSrc(1, lval); + } else { + st = new_Instruction(func, OP_SPLIT, ty); + st->setSrc(0, lval); + for (int d = 0; d < lval->reg.size / 4; ++d) + st->setDef(d, new_LValue(func, FILE_GPR)); + + for (int d = lval->reg.size / 4 - 1; d >= 0; --d) { + Value *tmp = cloneShallow(func, slot); + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32); + s->setSrc(0, tmp); + s->setSrc(1, st->getDef(d)); + defi->bb->insertAfter(defi, s); + } + } } else { st = new_Instruction(func, OP_CVT, ty); st->setDef(0, slot); @@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) Instruction *ld; if (slot->reg.file == FILE_MEMORY_LOCAL) { lval->noSpill = 1; - ld = new_Instruction(func, OP_LOAD, ty); + if (ty != TYPE_B96) { + ld = new_Instruction(func, OP_LOAD, ty); + } else { + ld = new_Instruction(func, OP_MERGE, ty); + for (int d = 0; d < lval->reg.size / 4; ++d) { + Value *tmp = cloneShallow(func, slot); + LValue *val; + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32); + l->setDef(0, (val = new_LValue(func, FILE_GPR))); + l->setSrc(0, tmp); + usei->bb->insertBefore(usei, l); + ld->setSrc(d, val); + val->noSpill = 1; + } + ld->setDef(0, lval); + usei->bb->insertBefore(usei, ld); + return lval; + } } else { ld = new_Instruction(func, OP_CVT, ty); } diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 68e69beb08f..1695553d793 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen, if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { buffer->domain = NOUVEAU_BO_GART; - } else if (buffer->base.bind & - (screen->vidmem_bindings & screen->sysmem_bindings)) { + } else if (buffer->base.bind == 0 || (buffer->base.bind & + (screen->vidmem_bindings & screen->sysmem_bindings))) { switch (buffer->base.usage) { case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: @@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen, if (buffer->base.bind & screen->sysmem_bindings) buffer->domain = NOUVEAU_BO_GART; } + /* There can be very special situations where we want non-gpu-mapped + * buffers, but never through this interface. + */ + assert(buffer->domain); ret = nouveau_buffer_allocate(screen, buffer, buffer->domain); if (ret == false) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c new file mode 100644 index 00000000000..6d23fd66945 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -0,0 +1,320 @@ +/* + * Copyright 2012 Francisco Jerez + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_compute.xml.h" + +#include "codegen/nv50_ir_driver.h" + +int +nv50_screen_compute_setup(struct nv50_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_device *dev = screen->base.device; + struct nouveau_object *chan = screen->base.channel; + struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; + unsigned obj_class; + int i, ret; + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + obj_class = NV50_COMPUTE_CLASS; + break; + case 0xa0: + switch (dev->chipset) { + case 0xa3: + case 0xa5: + case 0xa8: + obj_class = NVA3_COMPUTE_CLASS; + break; + default: + obj_class = NV50_COMPUTE_CLASS; + break; + } + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, + &screen->compute); + if (ret) + return ret; + + BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->handle); + + BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->stack_bo->offset); + PUSH_DATA (push, screen->stack_bo->offset); + BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); + PUSH_DATA (push, 4); + + BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); + PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); + BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); + PUSH_DATA (push, 0x100); + BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); + PUSH_DATA (push, fifo->vram); + + for (i = 0; i < 15; i++) { + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + } + + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); + PUSH_DATA (push, ~0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); + PUSH_DATA (push, 0x54); + BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); + PUSH_DATA (push, fifo->vram); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls_bo->offset + 65536); + PUSH_DATA (push, screen->tls_bo->offset + 65536); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); + PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); + + return 0; +} + +static bool +nv50_compute_validate_program(struct nv50_context *nv50) +{ + struct nv50_program *prog = nv50->compprog; + + if (prog->mem) + return true; + + if (!prog->translated) { + prog->translated = nv50_program_translate( + prog, nv50->screen->base.device->chipset, &nv50->base.debug); + if (!prog->translated) + return false; + } + if (unlikely(!prog->code_size)) + return false; + + if (likely(prog->code_size)) { + if (nv50_program_upload_code(nv50, prog)) { + struct nouveau_pushbuf *push = nv50->base.pushbuf; + BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1); + PUSH_DATA (push, 0); + return true; + } + } + return false; +} + +static void +nv50_compute_validate_globals(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource *res = *util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + if (res) + nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL, + nv04_resource(res), NOUVEAU_BO_RDWR); + } +} + +static bool +nv50_compute_state_validate(struct nv50_context *nv50) +{ + if (!nv50_compute_validate_program(nv50)) + return false; + + if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS) + nv50_compute_validate_globals(nv50); + + /* TODO: validate textures, samplers, surfaces */ + + nv50_bufctx_fence(nv50->bufctx_cp, false); + + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf))) + return false; + if (unlikely(nv50->state.flushed)) + nv50_bufctx_fence(nv50->bufctx_cp, true); + + return true; +} + +static void +nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = screen->base.pushbuf; + unsigned size = align(nv50->compprog->parm_size, 0x4); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, (size / 4) << 8); + + if (size) { + struct nouveau_mm_allocation *mm; + struct nouveau_bo *bo = NULL; + unsigned offset; + + mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset); + assert(mm); + + nouveau_bo_map(bo, 0, screen->base.client); + memcpy(bo->map + offset, input, size); + + nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4); + nouveau_pushbuf_data(push, bo, offset, size); + + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); + nouveau_bo_ref(NULL, &bo); + nouveau_bufctx_reset(nv50->bufctx, 0); + } +} + +static uint32_t +nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) +{ + struct nv50_program *prog = nv50->compprog; + const struct nv50_ir_prog_symbol *syms = + (const struct nv50_ir_prog_symbol *)prog->cp.syms; + unsigned i; + + for (i = 0; i < prog->cp.num_syms; ++i) { + if (syms[i].label == label) + return prog->code_base + syms[i].offset; + } + return prog->code_base; /* no symbols or symbol not found */ +} + +void +nv50_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, const void *input) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2]; + struct nv50_program *cp = nv50->compprog; + bool ret; + + ret = !nv50_compute_state_validate(nv50); + if (ret) { + NOUVEAU_ERR("Failed to launch grid !\n"); + return; + } + + nv50_compute_upload_input(nv50, input); + + BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nv50_compute_find_symbol(nv50, label)); + + BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); + BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, cp->max_gpr); + + /* grid/block setup */ + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); + PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); + PUSH_DATA (push, 1 << 16 | block_size); + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); + PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]); + BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 1); + + /* kernel launching */ + BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + /* bind a compute shader clobbers fragment shader state */ + nv50->dirty |= NV50_NEW_FRAGPROG; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h new file mode 100644 index 00000000000..268d11253b6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h @@ -0,0 +1,444 @@ +#ifndef NV50_COMPUTE_XML +#define NV50_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36) +- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36) +- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33) +- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09) +- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59) + +Copyright (C) 2006-2015 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV50_COMPUTE_DMA_NOTIFY 0x00000180 + +#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0 + +#define NV50_COMPUTE_DMA_QUERY 0x000001a4 + +#define NV50_COMPUTE_DMA_LOCAL 0x000001b8 + +#define NV50_COMPUTE_DMA_STACK 0x000001bc + +#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0 + +#define NV50_COMPUTE_DMA_TSC 0x000001c4 + +#define NV50_COMPUTE_DMA_TIC 0x000001c8 + +#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc + +#define NV50_COMPUTE_UNK0200 0x00000200 +#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff +#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000 +#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16 + +#define NV50_COMPUTE_UNK0204 0x00000204 + +#define NV50_COMPUTE_UNK0208 0x00000208 + +#define NV50_COMPUTE_UNK020C 0x0000020c + +#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210 + +#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214 + +#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218 + +#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c + +#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220 + +#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224 + +#define NV50_COMPUTE_UNK0228 0x00000228 +#define NV50_COMPUTE_UNK0228_UNK0 0x00000001 +#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0 +#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4 +#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000 +#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12 + +#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c + +#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230 +#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NV50_COMPUTE_TSC_LIMIT 0x00000234 +#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff + +#define NV50_COMPUTE_CB_ADDR 0x00000238 +#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00 +#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8 +#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f +#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0 + +#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0)) +#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004 +#define NV50_COMPUTE_CB_DATA__LEN 0x00000010 + +#define NV50_COMPUTE_TSC_FLUSH 0x0000027c +#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_TIC_FLUSH 0x00000280 +#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_DELAY1 0x00000284 + +#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288 + +#define NV50_COMPUTE_DELAY2 0x0000028c + +#define NV50_COMPUTE_UNK0290 0x00000290 + +#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294 + +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298 +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100 + +#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c + +#define NV50_COMPUTE_UNK02A0 0x000002a0 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8 + +#define NV50_COMPUTE_CB_DEF_SET 0x000002ac +#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff +#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16 + +#define NV50_COMPUTE_UNK02B0 0x000002b0 + +#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4 +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16 + +#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8 + +#define NV50_COMPUTE_UNK02BC 0x000002bc +#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007 +#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070 +#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4 + +#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0 + +#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4 + +#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8 + +#define NV50_COMPUTE_TIC_LIMIT 0x000002cc + +#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004 + +#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24 + +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008 + +#define NV50_COMPUTE_UNK02F4 0x000002f4 + +#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8 + +#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc + +#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300 + +#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304 + +#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308 + +#define NV50_COMPUTE_UNK030C 0x0000030c + +#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310 + +#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314 + +#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318 + +#define NV50_COMPUTE_QUERY_GET 0x0000031c +#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200 +#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000 + +#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320 + +#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324 + +#define NV50_COMPUTE_COND_MODE 0x00000328 +#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_COMPUTE_UNK032C 0x0000032c + +#define NV50_COMPUTE_UNK0330 0x00000330 + +#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0334__LEN 0x00000003 + +#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0340__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0348__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0350__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0358 0x00000358 + +#define NV50_COMPUTE_UNK035C 0x0000035c + +#define NV50_COMPUTE_UNK0360 0x00000360 +#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0 +#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4 +#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00 +#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8 + +#define NV50_COMPUTE_UNK0364 0x00000364 + +#define NV50_COMPUTE_LAUNCH 0x00000368 + +#define NV50_COMPUTE_UNK036C 0x0000036c + +#define NV50_COMPUTE_UNK0370 0x00000370 + +#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040 + +#define NV50_COMPUTE_LINKED_TSC 0x00000378 + +#define NV50_COMPUTE_UNK037C 0x0000037c +#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001 +#define NV50_COMPUTE_UNK037C_UNK16 0x00010000 + +#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380 + +#define NV50_COMPUTE_UNK0384 0x00000384 + +#define NV50_COMPUTE_GRIDID 0x00000388 + +#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0)) +#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK038C__LEN 0x00000003 + +#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398 + +#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0)) +#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK039C__LEN 0x00000002 + +#define NV50_COMPUTE_GRIDDIM 0x000003a4 +#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff +#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0 +#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000 +#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16 + +#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 +#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000 +#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040 + +#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac +#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16 + +#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 +#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001 +#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040 + +#define NV50_COMPUTE_CP_START_ID 0x000003b4 + +#define NV50_COMPUTE_REG_MODE 0x000003b8 +#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001 +#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002 + +#define NV50_COMPUTE_TEX_LIMITS 0x000003bc +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NV50_COMPUTE_BIND_TSC 0x000003c0 +#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4 +#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000 +#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12 + +#define NV50_COMPUTE_BIND_TIC 0x000003c4 +#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1 +#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9 + +#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12 +#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff + +#define NV50_COMPUTE_UNK03CC 0x000003cc + +#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NV50_COMPUTE_UNK03D4 0x000003d4 + +#define NV50_COMPUTE_UNK03D8 0x000003d8 + +#define NV50_COMPUTE_UNK03DC 0x000003dc + +#define NV50_COMPUTE_UNK03E0 0x000003e0 + +#define NV50_COMPUTE_UNK03E4 0x000003e4 + +#define NVA3_COMPUTE_TEX_MISC 0x000003e8 +#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001 +#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002 + +#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020 +#define NV50_COMPUTE_GLOBAL__LEN 0x00000010 + +#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000 +#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100 + +#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8 + +#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0)) +#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004 +#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040 + +#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0700__LEN 0x00000010 + + +#endif /* NV50_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 7867c2df7f3..4874b77b1e1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50) nouveau_bufctx_del(&nv50->bufctx_3d); nouveau_bufctx_del(&nv50->bufctx); + nouveau_bufctx_del(&nv50->bufctx_cp); util_unreference_framebuffer_state(&nv50->framebuffer); @@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50) if (!nv50->constbuf[s][i].user) pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL); } + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource **res = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + pipe_resource_reference(res, NULL); + } + util_dynarray_fini(&nv50->global_residents); } static void @@ -159,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nv50_context *nv50 = nv50_context(&ctx->pipe); + unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (res->bind & PIPE_BIND_RENDER_TARGET) { + if (bind & PIPE_BIND_RENDER_TARGET) { assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) { if (nv50->framebuffer.cbufs[i] && @@ -173,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nv50->framebuffer.zsbuf && nv50->framebuffer.zsbuf->texture == res) { nv50->dirty |= NV50_NEW_FRAMEBUFFER; @@ -183,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (res->bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_SAMPLER_VIEW)) { + if (bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_SAMPLER_VIEW)) { assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); for (i = 0; i < nv50->num_vtxbufs; ++i) { @@ -263,10 +273,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) nv50->base.pushbuf = screen->base.pushbuf; nv50->base.client = screen->base.client; - ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT, - &nv50->bufctx_3d); + ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT, + &nv50->bufctx_3d); if (!ret) - ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT, + &nv50->bufctx_cp); if (ret) goto out_err; @@ -290,6 +303,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) pipe->draw_vbo = nv50_draw_vbo; pipe->clear = nv50_clear; + pipe->launch_grid = nv50_launch_grid; pipe->flush = nv50_flush; pipe->texture_barrier = nv50_texture_barrier; @@ -335,19 +349,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + if (screen->compute) { + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo); + } flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); + if (screen->compute) + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); nv50->base.scratch.bo_size = 2 << 20; + util_dynarray_init(&nv50->global_residents); + return pipe; out_err: if (nv50->bufctx_3d) nouveau_bufctx_del(&nv50->bufctx_3d); + if (nv50->bufctx_cp) + nouveau_bufctx_del(&nv50->bufctx_cp); if (nv50->bufctx) nouveau_bufctx_del(&nv50->bufctx); FREE(nv50->blit); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index fb74a9748a3..2cebcd99423 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -49,6 +49,10 @@ #define NV50_NEW_MIN_SAMPLES (1 << 22) #define NV50_NEW_CONTEXT (1 << 31) +#define NV50_NEW_CP_PROGRAM (1 << 0) +#define NV50_NEW_CP_GLOBALS (1 << 1) + +/* 3d bufctx (during draw_vbo, blit_3d) */ #define NV50_BIND_FB 0 #define NV50_BIND_VERTEX 1 #define NV50_BIND_VERTEX_TMP 2 @@ -58,7 +62,15 @@ #define NV50_BIND_SO 53 #define NV50_BIND_SCREEN 54 #define NV50_BIND_TLS 55 -#define NV50_BIND_COUNT 56 +#define NV50_BIND_3D_COUNT 56 + +/* compute bufctx (during launch_grid) */ +#define NV50_BIND_CP_GLOBAL 0 +#define NV50_BIND_CP_SCREEN 1 +#define NV50_BIND_CP_QUERY 2 +#define NV50_BIND_CP_COUNT 3 + +/* bufctx for other operations */ #define NV50_BIND_2D 0 #define NV50_BIND_M2MF 0 #define NV50_BIND_FENCE 1 @@ -101,8 +113,10 @@ struct nv50_context { struct nouveau_bufctx *bufctx_3d; struct nouveau_bufctx *bufctx; + struct nouveau_bufctx *bufctx_cp; uint32_t dirty; + uint32_t dirty_cp; /* dirty flags for compute state */ bool cb_dirty; struct nv50_graph_state state; @@ -115,6 +129,7 @@ struct nv50_context { struct nv50_program *vertprog; struct nv50_program *gmtyprog; struct nv50_program *fragprog; + struct nv50_program *compprog; struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[3]; @@ -163,6 +178,8 @@ struct nv50_context { uint32_t cond_condmode; /* the calculated condition */ struct nv50_blitctx *blit; + + struct util_dynarray global_residents; }; static inline struct nv50_context * @@ -302,4 +319,9 @@ struct pipe_video_buffer * nv98_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *template); +/* nv50_compute.c */ +void +nv50_launch_grid(struct pipe_context *, const uint *, const uint *, + uint32_t, const void *); + #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 89e7a338283..a4b8ddfda95 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; - prog->vp.vertexid = 1; continue; default: break; @@ -259,6 +258,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) return nv50_vertprog_assign_slots(info); case PIPE_SHADER_FRAGMENT: return nv50_fragprog_assign_slots(info); + case PIPE_SHADER_COMPUTE: + return 0; default: return -1; } @@ -355,6 +356,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->gp.has_layer = 0; prog->gp.has_viewport = 0; + if (prog->type == PIPE_SHADER_COMPUTE) + info->prop.cp.inputOffset = 0x10; + info->driverPriv = prog; #ifdef DEBUG @@ -378,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; @@ -401,6 +407,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, break; } prog->gp.vert_count = info->prop.gp.maxVertices; + } else + if (prog->type == PIPE_SHADER_COMPUTE) { + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; } if (prog->pipe.stream_output.num_outputs) @@ -423,11 +433,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); + uint8_t prog_type; switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; @@ -450,7 +462,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) return false; } } - prog->code_base = prog->mem->start; + + if (prog->type == PIPE_SHADER_COMPUTE) { + /* CP code must be uploaded in FP code segment. */ + prog_type = 1; + } else { + prog->code_base = prog->mem->start; + prog_type = prog->type; + } ret = nv50_tls_realloc(nv50->screen, prog->tls_space); if (ret < 0) { @@ -468,7 +487,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) false /* flatshade */); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, - (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); @@ -489,7 +508,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) FREE(p->code); FREE(p->fixups); - + FREE(p->interps); FREE(p->so); memset(p, 0, sizeof(*p)); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 7a33eb11d6d..1de5122a56e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,9 +76,9 @@ struct nv50_program { ubyte psiz; /* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; - ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; + bool need_vertex_id; } vp; struct { @@ -98,6 +98,13 @@ struct nv50_program { ubyte viewportid; /* hw value of viewport index output */ } gp; + struct { + uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */ + uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ + void *syms; + unsigned num_syms; + } cp; + void *fixups; /* relocation records */ void *interps; /* interpolation records */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c index f31eaa0e314..cbef95d07f6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_push.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c @@ -24,6 +24,10 @@ struct push_context { struct translate *translate; bool primitive_restart; + + bool need_vertex_id; + int32_t index_bias; + uint32_t prim; uint32_t restart_index; uint32_t instance_id; @@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) static void emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { + uint32_t elts = 0; + while (count) { unsigned push = MIN2(count, ctx->packet_vertex_limit); unsigned size = ctx->vertex_words * push; + if (unlikely(ctx->need_vertex_id)) { + /* For non-indexed draws, gl_VertexID goes up after each vertex. */ + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, elts++); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, @@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.push = nv50->base.pushbuf; ctx.translate = nv50->vertex->translate; - ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; + + ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS && + nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32); + ctx.index_bias = info->index_bias; + + /* For indexed draws, gl_VertexID must be emitted for every vertex. */ + ctx.packet_vertex_limit = + ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit; ctx.vertex_words = nv50->vertex->vertex_size; assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); @@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.instance_id++; ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } + + if (unlikely(ctx.need_vertex_id)) { + /* Reset gl_VertexID to prevent future indexed draws to be confused. */ + BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx.push, nv50->state.index_bias); + } } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index dd9b85b7208..4cd3b615606 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" static struct pipe_query * nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) @@ -152,4 +154,79 @@ nv50_init_query_functions(struct nv50_context *nv50) pipe->end_query = nv50_end_query; pipe->get_query_result = nv50_get_query_result; pipe->render_condition = nv50_render_condition; + nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS; +} + +int +nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int num_hw_queries = 0; + + num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_queries; + + /* Init default values. */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->max_value.u64 = 0; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->group_id = -1; + info->flags = 0; + + return nv50_hw_get_driver_query_info(screen, id, info); +} + +int +nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_group_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += 2; + + if (!info) + return count; + + if (id == NV50_HW_SM_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = "MP counters"; + + /* Because we can't expose the number of hardware counters needed + * for each different query, we don't want to allow more than one + * active query simultaneously to avoid failure when the maximum + * number of counters is reached. Note that these groups of GPU + * counters are currently only used by AMD_performance_monitor. + */ + info->max_active_queries = 1; + info->num_queries = NV50_HW_SM_QUERY_COUNT; + return 1; + } + } + } else + if (id == NV50_HW_METRIC_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = "Performance metrics"; + info->max_active_queries = 1; + info->num_queries = NV50_HW_METRIC_QUERY_COUNT; + return 1; + } + } + } + + /* user asked for info about non-existing query group */ + info->name = "this_is_not_the_query_group_you_are_looking_for"; + info->max_active_queries = 0; + info->num_queries = 0; + return 0; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h index d990285c857..bd4c0a386f6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -28,6 +28,12 @@ nv50_query(struct pipe_query *pipe) return (struct nv50_query *)pipe; } +/* + * Driver queries groups: + */ +#define NV50_HW_SM_QUERY_GROUP 0 +#define NV50_HW_METRIC_QUERY_GROUP 1 + void nv50_init_query_functions(struct nv50_context *); #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index 945ce7abe50..b6ebbbf1010 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -25,6 +25,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" #include "nv_object.xml.h" #define NV50_HW_QUERY_STATE_READY 0 @@ -41,7 +43,7 @@ #define NV50_HW_QUERY_ALLOC_SPACE 256 -static bool +bool nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) { @@ -122,6 +124,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->begin_query) + return hq->funcs->begin_query(nv50, hq); + /* For occlusion queries we have to change the storage, because a previous * query might set the initial render condition to false even *after* we re- * initialized it to true. @@ -193,6 +198,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->end_query) { + hq->funcs->end_query(nv50, hq); + return; + } + hq->state = NV50_HW_QUERY_STATE_ENDED; switch (q->type) { @@ -261,6 +271,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, uint64_t *data64 = (uint64_t *)hq->data; int i; + if (hq->funcs && hq->funcs->get_query_result) + return hq->funcs->get_query_result(nv50, hq, wait, result); + if (hq->state != NV50_HW_QUERY_STATE_READY) nv50_hw_query_update(q); @@ -331,6 +344,18 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) struct nv50_hw_query *hq; struct nv50_query *q; + hq = nv50_hw_sm_create_query(nv50, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nv50_query *)hq; + } + + hq = nv50_hw_metric_create_query(nv50, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nv50_query *)hq; + } + hq = CALLOC_STRUCT(nv50_hw_query); if (!hq) return NULL; @@ -375,6 +400,26 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) return q; } +int +nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; + + num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL); + num_hw_metric_queries = + nv50_hw_metric_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_sm_queries + num_hw_metric_queries; + + if (id < num_hw_sm_queries) + return nv50_hw_sm_get_driver_query_info(screen, id, info); + + return nv50_hw_metric_get_driver_query_info(screen, + id - num_hw_sm_queries, info); +} + void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, struct nv50_query *q, unsigned result_offset) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index 294c67de9a4..82ec6bd2d96 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -8,8 +8,19 @@ #define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) +struct nv50_hw_query; + +struct nv50_hw_query_funcs { + void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *); + void (*end_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *, + boolean, union pipe_query_result *); +}; + struct nv50_hw_query { struct nv50_query base; + const struct nv50_hw_query_funcs *funcs; uint32_t *data; uint32_t sequence; struct nouveau_bo *bo; @@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q) struct nv50_query * nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); +int +nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +bool +nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int); void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, struct nv50_query *, unsigned); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c new file mode 100644 index 00000000000..d1bccb94193 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c @@ -0,0 +1,207 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" + +/* === PERFORMANCE MONITORING METRICS for NV84+ === */ +static const char *nv50_hw_metric_names[] = +{ + "metric-branch_efficiency", +}; + +struct nv50_hw_metric_query_cfg { + uint32_t queries[4]; + uint32_t num_queries; +}; + +#define _SM(n) NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_ ##n) +#define _M(n, c) [NV50_HW_METRIC_QUERY_##n] = c + +/* ==== Compute capability 1.1 (G84+) ==== */ +static const struct nv50_hw_metric_query_cfg +sm11_branch_efficiency = +{ + .queries[0] = _SM(BRANCH), + .queries[1] = _SM(DIVERGENT_BRANCH), + .num_queries = 2, +}; + +static const struct nv50_hw_metric_query_cfg *sm11_hw_metric_queries[] = +{ + _M(BRANCH_EFFICIENCY, &sm11_branch_efficiency), +}; + +#undef _SM +#undef _M + +static const struct nv50_hw_metric_query_cfg * +nv50_hw_metric_query_get_cfg(struct nv50_context *nv50, + struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + return sm11_hw_metric_queries[q->type - NV50_HW_METRIC_QUERY(0)]; +} + +static void +nv50_hw_metric_destroy_query(struct nv50_context *nv50, + struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]); + FREE(hmq); +} + +static boolean +nv50_hw_metric_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->begin_query(nv50, hmq->queries[i]); + if (!ret) + return ret; + } + return ret; +} + +static void +nv50_hw_metric_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->end_query(nv50, hmq->queries[i]); +} + +static uint64_t +sm11_hw_metric_calc_result(struct nv50_hw_query *hq, uint64_t res64[8]) +{ + switch (hq->base.type - NV50_HW_METRIC_QUERY(0)) { + case NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY: + /* (branch / (branch + divergent_branch)) * 100 */ + if (res64[0] + res64[1]) + return (res64[0] / (double)(res64[0] + res64[1])) * 100; + break; + default: + debug_printf("invalid metric type: %d\n", + hq->base.type - NV50_HW_METRIC_QUERY(0)); + break; + } + return 0; +} + +static boolean +nv50_hw_metric_get_query_result(struct nv50_context *nv50, + struct nv50_hw_query *hq, boolean wait, + union pipe_query_result *result) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + union pipe_query_result results[4] = {}; + uint64_t res64[4] = {}; + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->get_query_result(nv50, hmq->queries[i], + wait, &results[i]); + if (!ret) + return ret; + res64[i] = *(uint64_t *)&results[i]; + } + + *(uint64_t *)result = sm11_hw_metric_calc_result(hq, res64); + return ret; +} + +static const struct nv50_hw_query_funcs hw_metric_query_funcs = { + .destroy_query = nv50_hw_metric_destroy_query, + .begin_query = nv50_hw_metric_begin_query, + .end_query = nv50_hw_metric_end_query, + .get_query_result = nv50_hw_metric_get_query_result, +}; + +struct nv50_hw_query * +nv50_hw_metric_create_query(struct nv50_context *nv50, unsigned type) +{ + const struct nv50_hw_metric_query_cfg *cfg; + struct nv50_hw_metric_query *hmq; + struct nv50_hw_query *hq; + unsigned i; + + if (type < NV50_HW_METRIC_QUERY(0) || type > NV50_HW_METRIC_QUERY_LAST) + return NULL; + + hmq = CALLOC_STRUCT(nv50_hw_metric_query); + if (!hmq) + return NULL; + + hq = &hmq->base; + hq->funcs = &hw_metric_query_funcs; + hq->base.type = type; + + cfg = nv50_hw_metric_query_get_cfg(nv50, hq); + + for (i = 0; i < cfg->num_queries; i++) { + hmq->queries[i] = nv50_hw_sm_create_query(nv50, cfg->queries[i]); + if (!hmq->queries[i]) { + nv50_hw_metric_destroy_query(nv50, hq); + return NULL; + } + hmq->num_queries++; + } + + return hq; +} + +int +nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += NV50_HW_METRIC_QUERY_COUNT; + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = nv50_hw_metric_names[id]; + info->query_type = NV50_HW_METRIC_QUERY(id); + info->group_id = NV50_HW_METRIC_QUERY_GROUP; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h new file mode 100644 index 00000000000..f8cfc04084f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h @@ -0,0 +1,34 @@ +#ifndef __NV50_QUERY_HW_METRIC_H__ +#define __NV50_QUERY_HW_METRIC_H__ + +#include "nv50_query_hw.h" + +struct nv50_hw_metric_query { + struct nv50_hw_query base; + struct nv50_hw_query *queries[4]; + unsigned num_queries; +}; + +static inline struct nv50_hw_metric_query * +nv50_hw_metric_query(struct nv50_hw_query *hq) +{ + return (struct nv50_hw_metric_query *)hq; +} + +/* + * Driver metrics queries: + */ +#define NV50_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i)) +#define NV50_HW_METRIC_QUERY_LAST NV50_HW_METRIC_QUERY(NV50_HW_METRIC_QUERY_COUNT - 1) +enum nv50_hw_metric_queries +{ + NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY = 0, + NV50_HW_METRIC_QUERY_COUNT +}; + +struct nv50_hw_query * +nv50_hw_metric_create_query(struct nv50_context *, unsigned); +int +nv50_hw_metric_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c new file mode 100644 index 00000000000..8453ce76095 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -0,0 +1,417 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw_sm.h" + +#include "nv_object.xml.h" +#include "nv50/nv50_compute.xml.h" + +/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */ + +/* NOTE: intentionally using the same names as NV */ +static const char *nv50_hw_sm_query_names[] = +{ + "branch", + "divergent_branch", + "instructions", + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", + "sm_cta_launched", + "warp_serialize", +}; + +static const uint64_t nv50_read_hw_sm_counters_code[] = +{ + /* and b32 $r0 $r0 0x0000ffff + * add b32 $c0 $r0 $r0 $r0 + * (lg $c0) ret + * mov $r0 $pm0 + * mov $r1 $pm1 + * mov $r2 $pm2 + * mov $r3 $pm3 + * mov $r4 $physid + * ld $r5 b32 s[0x10] + * ld $r6 b32 s[0x14] + * and b32 $r4 $r4 0x000f0000 + * shr u32 $r4 $r4 0x10 + * mul $r4 u24 $r4 0x14 + * add b32 $r5 $r5 $r4 + * st b32 g15[$r5] $r0 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r1 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r2 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r3 + * add b32 $r5 $r5 0x04 + * exit st b32 g15[$r5] $r6 */ + 0x00000fffd03f0001ULL, + 0x040007c020000001ULL, + 0x0000028030000003ULL, + 0x6001078000000001ULL, + 0x6001478000000005ULL, + 0x6001878000000009ULL, + 0x6001c7800000000dULL, + 0x6000078000000011ULL, + 0x4400c78010000815ULL, + 0x4400c78010000a19ULL, + 0x0000f003d0000811ULL, + 0xe410078030100811ULL, + 0x0000000340540811ULL, + 0x0401078020000a15ULL, + 0xa0c00780d00f0a01ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a05ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a09ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a0dULL, + 0x0000000320048a15ULL, + 0xa0c00781d00f0a19ULL, +}; + +struct nv50_hw_sm_counter_cfg +{ + uint32_t mode : 4; /* LOGOP, LOGOP_PULSE */ + uint32_t unit : 8; /* UNK[0-5] */ + uint32_t sig : 8; /* signal selection */ +}; + +struct nv50_hw_sm_query_cfg +{ + struct nv50_hw_sm_counter_cfg ctr[4]; + uint8_t num_counters; +}; + +#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 } + +/* ==== Compute capability 1.1 (G84+) ==== */ +static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] = +{ + _Q(BRANCH, LOGOP, UNK4, 0x02), + _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09), + _Q(INSTRUCTIONS, LOGOP, UNK4, 0x04), + _Q(PROF_TRIGGER_0, LOGOP, UNK1, 0x26), + _Q(PROF_TRIGGER_1, LOGOP, UNK1, 0x27), + _Q(PROF_TRIGGER_2, LOGOP, UNK1, 0x28), + _Q(PROF_TRIGGER_3, LOGOP, UNK1, 0x29), + _Q(PROF_TRIGGER_4, LOGOP, UNK1, 0x2a), + _Q(PROF_TRIGGER_5, LOGOP, UNK1, 0x2b), + _Q(PROF_TRIGGER_6, LOGOP, UNK1, 0x2c), + _Q(PROF_TRIGGER_7, LOGOP, UNK1, 0x2d), + _Q(SM_CTA_LAUNCHED, LOGOP, UNK1, 0x33), + _Q(WARP_SERIALIZE, LOGOP, UNK0, 0x0b), +}; + +static inline uint16_t nv50_hw_sm_get_func(uint8_t slot) +{ + switch (slot) { + case 0: return 0xaaaa; + case 1: return 0xcccc; + case 2: return 0xf0f0; + case 3: return 0xff00; + } + return 0; +} + +static const struct nv50_hw_sm_query_cfg * +nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)]; +} + +static void +nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + q->funcs->destroy_query(nv50, q); +} + +static boolean +nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + const struct nv50_hw_sm_query_cfg *cfg; + uint16_t func; + int i, c; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + /* check if we have enough free counter slots */ + if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) { + NOUVEAU_ERR("Not enough free MP counter slots !\n"); + return false; + } + + assert(cfg->num_counters <= 4); + PUSH_SPACE(push, 4 * 4); + + /* set sequence field to 0 (used to check if result is available) */ + for (i = 0; i < screen->MPsInTP; ++i) { + const unsigned b = (0x14 / 4) * i; + hq->data[b + 16] = 0; + } + hq->sequence++; + + for (i = 0; i < cfg->num_counters; i++) { + screen->pm.num_hw_sm_active++; + + /* find free counter slots */ + for (c = 0; c < 4; ++c) { + if (!screen->pm.mp_counter[c]) { + hsq->ctr[i] = c; + screen->pm.mp_counter[c] = hsq; + break; + } + } + + /* select func to aggregate counters */ + func = nv50_hw_sm_get_func(c); + + /* configure and reset the counter(s) */ + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1); + PUSH_DATA (push, 0); + } + return true; +} + +static void +nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct pipe_context *pipe = &nv50->base.pipe; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + uint32_t mask; + uint32_t input[3]; + const uint block[3] = { 32, 1, 1 }; + const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 }; + int c; + + if (unlikely(!screen->pm.prog)) { + struct nv50_program *prog = CALLOC_STRUCT(nv50_program); + prog->type = PIPE_SHADER_COMPUTE; + prog->translated = true; + prog->max_gpr = 7; + prog->parm_size = 8; + prog->code = (uint32_t *)nv50_read_hw_sm_counters_code; + prog->code_size = sizeof(nv50_read_hw_sm_counters_code); + screen->pm.prog = prog; + } + + /* disable all counting */ + PUSH_SPACE(push, 8); + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c]) { + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, 0); + } + } + + /* release counters for this query */ + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c] == hsq) { + screen->pm.num_hw_sm_active--; + screen->pm.mp_counter[c] = NULL; + } + } + + BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR, + hq->bo); + + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + pipe->bind_compute_state(pipe, screen->pm.prog); + input[0] = hq->bo->offset + hq->base_offset; + input[1] = hq->sequence; + pipe->launch_grid(pipe, block, grid, 0, input); + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY); + + /* re-active other counters */ + PUSH_SPACE(push, 8); + mask = 0; + for (c = 0; c < 4; c++) { + const struct nv50_hw_sm_query_cfg *cfg; + unsigned i; + + hsq = screen->pm.mp_counter[c]; + if (!hsq) + continue; + + cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base); + for (i = 0; i < cfg->num_counters; i++) { + uint16_t func; + + if (mask & (1 << hsq->ctr[i])) + break; + + mask |= 1 << hsq->ctr[i]; + func = nv50_hw_sm_get_func(hsq->ctr[i]); + + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + } + } +} + +static inline bool +nv50_hw_sm_query_read_data(uint32_t count[32][4], + struct nv50_context *nv50, bool wait, + struct nv50_hw_query *hq, + const struct nv50_hw_sm_query_cfg *cfg, + unsigned mp_count) +{ + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + unsigned p, c; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x14 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + if (hq->data[b + 4] != hq->sequence) { + if (!wait) + return false; + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client)) + return false; + } + count[p][c] = hq->data[b + hsq->ctr[c]]; + } + } + return true; +} + +static boolean +nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq, + boolean wait, union pipe_query_result *result) +{ + uint32_t count[32][4]; + uint64_t value = 0; + unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32); + unsigned p, c; + const struct nv50_hw_sm_query_cfg *cfg; + bool ret; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count); + if (!ret) + return false; + + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + value += count[p][c]; + + /* We only count a single TP, and simply multiply by the total number of + * TPs to compute result over all TPs. This is inaccurate, but enough! */ + value *= nv50->screen->TPs; + + *(uint64_t *)result = value; + return true; +} + +static const struct nv50_hw_query_funcs hw_sm_query_funcs = { + .destroy_query = nv50_hw_sm_destroy_query, + .begin_query = nv50_hw_sm_begin_query, + .end_query = nv50_hw_sm_end_query, + .get_query_result = nv50_hw_sm_get_query_result, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type) +{ + struct nv50_hw_sm_query *hsq; + struct nv50_hw_query *hq; + unsigned space; + + if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST) + return NULL; + + hsq = CALLOC_STRUCT(nv50_hw_sm_query); + if (!hsq) + return NULL; + + hq = &hsq->base; + hq->funcs = &hw_sm_query_funcs; + hq->base.type = type; + + /* + * for each MP: + * [00] = MP.C0 + * [04] = MP.C1 + * [08] = MP.C2 + * [0c] = MP.C3 + * [10] = MP.sequence + */ + space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t); + + if (!nv50_hw_query_allocate(nv50, &hq->base, space)) { + FREE(hq); + return NULL; + } + + return hq; +} + +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += NV50_HW_SM_QUERY_COUNT; + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = nv50_hw_sm_query_names[id]; + info->query_type = NV50_HW_SM_QUERY(id); + info->group_id = NV50_HW_SM_QUERY_GROUP; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h new file mode 100644 index 00000000000..c1a1cd175e3 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h @@ -0,0 +1,45 @@ +#ifndef __NV50_QUERY_HW_SM_H__ +#define __NV50_QUERY_HW_SM_H__ + +#include "nv50_query_hw.h" + +struct nv50_hw_sm_query { + struct nv50_hw_query base; + uint8_t ctr[4]; +}; + +static inline struct nv50_hw_sm_query * +nv50_hw_sm_query(struct nv50_hw_query *hq) +{ + return (struct nv50_hw_sm_query *)hq; +} + +/* + * Performance counter queries: + */ +#define NV50_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NV50_HW_SM_QUERY_LAST NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1) +enum nv50_hw_sm_queries +{ + NV50_HW_SM_QUERY_BRANCH = 0, + NV50_HW_SM_QUERY_DIVERGENT_BRANCH, + NV50_HW_SM_QUERY_INSTRUCTIONS, + NV50_HW_SM_QUERY_PROF_TRIGGER_0, + NV50_HW_SM_QUERY_PROF_TRIGGER_1, + NV50_HW_SM_QUERY_PROF_TRIGGER_2, + NV50_HW_SM_QUERY_PROF_TRIGGER_3, + NV50_HW_SM_QUERY_PROF_TRIGGER_4, + NV50_HW_SM_QUERY_PROF_TRIGGER_5, + NV50_HW_SM_QUERY_PROF_TRIGGER_6, + NV50_HW_SM_QUERY_PROF_TRIGGER_7, + NV50_HW_SM_QUERY_SM_CTA_LAUNCHED, + NV50_HW_SM_QUERY_WARP_SERIALIZE, + NV50_HW_SM_QUERY_COUNT, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *, unsigned); +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index f47e998ab1e..1e4b75f18e0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -41,8 +41,6 @@ #define THREADS_IN_WARP 32 -#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) - static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_COMPUTE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ @@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: - case PIPE_CAP_COMPUTE: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */ @@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_COMPUTE: break; default: return 0; @@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) return 0.0f; } +static int +nv50_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_compute_cap param, void *data) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x)); \ + return sizeof(x); \ +} while (0) + + switch (param) { + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + RET((uint64_t []) { 2 }); + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + RET(((uint64_t []) { 65535, 65535 })); + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + RET(((uint64_t []) { 512, 512, 64 })); + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + RET((uint64_t []) { 512 }); + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */ + RET((uint64_t []) { 1ULL << 32 }); + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ + RET((uint64_t []) { 4096 }); + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + RET((uint32_t []) { 32 }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ULL << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0 }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ + default: + return 0; + } + +#undef RET +} + static void nv50_screen_destroy(struct pipe_screen *pscreen) { @@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->tesla); nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->compute); nouveau_object_del(&screen->sync); nouveau_screen_fini(&screen->base); @@ -640,7 +686,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, 0); if (screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, 0); } @@ -742,6 +788,9 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; + pscreen->get_compute_param = nv50_screen_get_compute_param; + pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; + pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; nv50_screen_init_resource_functions(pscreen); @@ -851,6 +900,8 @@ nv50_screen_create(struct nouveau_device *dev) screen->TPs = util_bitcount(value & 0xffff); screen->MPsInTP = util_bitcount((value >> 24) & 0xf); + screen->mp_count = screen->TPs * screen->MPsInTP; + stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP * STACK_WARPS_ALLOC * 64 * 8; @@ -902,6 +953,12 @@ nv50_screen_create(struct nouveau_device *dev) nv50_screen_init_hwctx(screen); + ret = nv50_screen_compute_setup(screen, screen->base.pushbuf); + if (ret) { + NOUVEAU_ERR("Failed to init compute context: %d\n", ret); + goto fail; + } + nouveau_fence_new(&screen->base, &screen->base.fence.current, false); return pscreen; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index ce51f0fc254..2a4983d1020 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -23,6 +23,10 @@ struct nv50_context; #define NV50_MAX_VIEWPORTS 16 +#define NV50_MAX_GLOBALS 16 + +#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) + struct nv50_blitter; struct nv50_graph_state { @@ -66,6 +70,7 @@ struct nv50_screen { unsigned MPsInTP; unsigned max_tls_space; unsigned cur_tls_space; + unsigned mp_count; struct nouveau_heap *vp_code_heap; struct nouveau_heap *gp_code_heap; @@ -90,9 +95,16 @@ struct nv50_screen { struct nouveau_bo *bo; } fence; + struct { + struct nv50_program *prog; /* compute state object to read MP counters */ + struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */ + uint8_t num_hw_sm_active; + } pm; + struct nouveau_object *sync; struct nouveau_object *tesla; + struct nouveau_object *compute; struct nouveau_object *eng2d; struct nouveau_object *m2mf; }; @@ -103,12 +115,19 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); +int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_group_info *); + bool nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); int nv50_screen_tsc_alloc(struct nv50_screen *, void *); +int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *); + static inline void nv50_resource_fence(struct nv04_resource *res, uint32_t flags) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index d27f12ca94b..b4ea08d4d13 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) nv50->dirty |= NV50_NEW_GMTYPROG; } +static void * +nv50_cp_state_create(struct pipe_context *pipe, + const struct pipe_compute_state *cso) +{ + struct nv50_program *prog; + + prog = CALLOC_STRUCT(nv50_program); + if (!prog) + return NULL; + prog->type = PIPE_SHADER_COMPUTE; + + prog->cp.smem_size = cso->req_local_mem; + prog->cp.lmem_size = cso->req_private_mem; + prog->parm_size = cso->req_input_mem; + + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + + return (void *)prog; +} + +static void +nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->compprog = hwcso; + nv50->dirty_cp |= NV50_NEW_CP_PROGRAM; +} + static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_constant_buffer *cb) @@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe, nv50->dirty |= NV50_NEW_STRMOUT; } +static void +nv50_set_compute_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + /* TODO: bind surfaces */ +} + +static inline void +nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res) +{ + struct nv04_resource *buf = nv04_resource(res); + if (buf) { + uint64_t limit = (buf->address + buf->base.width0) - 1; + if (limit < (1ULL << 32)) { + *phandle = (uint32_t)buf->address; + } else { + NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: " + "resource not contained within 32-bit address space !\n"); + *phandle = 0; + } + } else { + *phandle = 0; + } +} + +static void +nv50_set_global_bindings(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_resource **ptr; + unsigned i; + const unsigned end = start + nr; + + if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) { + const unsigned old_size = nv50->global_residents.size; + const unsigned req_size = end * sizeof(struct pipe_resource *); + util_dynarray_resize(&nv50->global_residents, req_size); + memset((uint8_t *)nv50->global_residents.data + old_size, 0, + req_size - old_size); + } + + if (resources) { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) { + pipe_resource_reference(&ptr[i], resources[i]); + nv50_set_global_handle(handles[i], resources[i]); + } + } else { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) + pipe_resource_reference(&ptr[i], NULL); + } + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL); + + nv50->dirty_cp = NV50_NEW_CP_GLOBALS; +} + void nv50_init_state_functions(struct nv50_context *nv50) { @@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->create_vs_state = nv50_vp_state_create; pipe->create_fs_state = nv50_fp_state_create; pipe->create_gs_state = nv50_gp_state_create; + pipe->create_compute_state = nv50_cp_state_create; pipe->bind_vs_state = nv50_vp_state_bind; pipe->bind_fs_state = nv50_fp_state_bind; pipe->bind_gs_state = nv50_gp_state_bind; + pipe->bind_compute_state = nv50_cp_state_bind; pipe->delete_vs_state = nv50_sp_state_delete; pipe->delete_fs_state = nv50_sp_state_delete; pipe->delete_gs_state = nv50_sp_state_delete; + pipe->delete_compute_state = nv50_sp_state_delete; pipe->set_blend_color = nv50_set_blend_color; pipe->set_stencil_ref = nv50_set_stencil_ref; @@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->stream_output_target_destroy = nv50_so_target_destroy; pipe->set_stream_output_targets = nv50_set_stream_output_targets; + pipe->set_global_binding = nv50_set_global_bindings; + pipe->set_compute_resources = nv50_set_compute_resources; + nv50->sample_mask = ~0; nv50->min_samples = 1; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b6181edf24f..02a759c23ad 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,8 +503,7 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | - NV50_NEW_VERTPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 916a7d44a31..8ba19d2cc90 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -339,12 +339,18 @@ nv50_clear_render_target(struct pipe_context *pipe, PUSH_DATA (push, (width << 16) | dstx); PUSH_DATA (push, (height << 16) | dsty); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, 0x3c | (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } @@ -415,12 +421,18 @@ nv50_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, (width << 16) | dstx); PUSH_DATA (push, (height << 16) | dsty); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, mode | (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } @@ -673,6 +685,9 @@ nv50_clear_buffer(struct pipe_context *pipe, PUSH_DATA (push, (width << 16)); PUSH_DATA (push, (height << 16)); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); PUSH_DATA (push, 0x3c); @@ -690,6 +705,9 @@ nv50_clear_buffer(struct pipe_context *pipe, PUSH_DATA (push, 0x3c); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 9aa593f919e..85878d5fcc7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) uint64_t addrs[PIPE_MAX_ATTRIBS]; uint32_t limits[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_vertex_stateobj dummy = {}; - struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy; + struct nv50_vertex_stateobj *vertex = nv50->vertex; struct pipe_vertex_buffer *vb; struct nv50_vertex_element *ve; uint32_t mask; @@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); - /* A vertexid is not generated for inline data uploads. Have to use a - * VBO. This check must come after the vertprog has been validated, - * otherwise vertexid may be unset. - */ - assert(nv50->vertprog->translated); - if (nv50->vertprog->vp.vertexid) - nv50->vbo_push_hint = 0; - if (unlikely(vertex->need_conversion)) nv50->vbo_fifo = ~0; else @@ -487,7 +478,7 @@ nv50_draw_arrays(struct nv50_context *nv50, BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, 0); if (nv50->screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, 0); } nv50->state.index_bias = 0; @@ -613,7 +604,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten, BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, index_bias); if (nv50->screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, index_bias); } nv50->state.index_bias = index_bias; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index 76f1b41ea70..68002305d72 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_3D(m) 3, (m) #define NV50_3D(n) SUBC_3D(NV50_3D_##n) +#define NV84_3D(n) SUBC_3D(NV84_3D_##n) #define NVA0_3D(n) SUBC_3D(NVA0_3D_##n) #define SUBC_2D(m) 4, (m) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 82ed5a1864e..162661ff2a7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe); + unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (res->bind & PIPE_BIND_RENDER_TARGET) { + if (bind & PIPE_BIND_RENDER_TARGET) { for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { if (nvc0->framebuffer.cbufs[i] && nvc0->framebuffer.cbufs[i]->texture == res) { @@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nvc0->framebuffer.zsbuf && nvc0->framebuffer.zsbuf->texture == res) { nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (res->bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_COMMAND_ARGS_BUFFER | - PIPE_BIND_SAMPLER_VIEW)) { + if (bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_COMMAND_ARGS_BUFFER | + PIPE_BIND_SAMPLER_VIEW)) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (nvc0->vtxbuf[i].buffer == res) { nvc0->dirty |= NVC0_NEW_ARRAYS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index f53921092a5..d992b10a23c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, info->max_value.u64 = 0; info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; info->group_id = -1; + info->flags = 0; #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS if (id < num_sw_queries) @@ -200,7 +201,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, if (id == NVC0_HW_SM_QUERY_GROUP) { if (screen->compute) { info->name = "MP counters"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; /* Because we can't expose the number of hardware counters needed for * each different query, we don't want to allow more than one active @@ -224,7 +224,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, if (screen->compute) { if (screen->base.class_3d < NVE4_3D_CLASS) { info->name = "Performance metrics"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; info->max_active_queries = 1; info->num_queries = NVC0_HW_METRIC_QUERY_COUNT; return 1; @@ -234,7 +233,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) { info->name = "Driver statistics"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU; info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT; info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT; return 1; @@ -245,7 +243,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, info->name = "this_is_not_the_query_group_you_are_looking_for"; info->max_active_queries = 0; info->num_queries = 0; - info->type = 0; return 0; } @@ -260,4 +257,5 @@ nvc0_init_query_functions(struct nvc0_context *nvc0) pipe->end_query = nvc0_end_query; pipe->get_query_result = nvc0_get_query_result; pipe->render_condition = nvc0_render_condition; + nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 44b222e5134..7962143d45a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -1014,14 +1014,15 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); prog->type = PIPE_SHADER_COMPUTE; prog->translated = true; - prog->num_gprs = 14; prog->parm_size = 12; if (is_nve4) { prog->code = (uint32_t *)nve4_read_hw_sm_counters_code; prog->code_size = sizeof(nve4_read_hw_sm_counters_code); + prog->num_gprs = 14; } else { prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code; prog->code_size = sizeof(nvc0_read_hw_sm_counters_code); + prog->num_gprs = 12; } screen->pm.prog = prog; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index cdb1fc1145f..6a4ae5be2ab 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -341,12 +341,16 @@ nvc0_clear_render_target(struct pipe_context *pipe, nvc0_resource_fence(res, NOUVEAU_BO_WR); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, 0x3c | (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } @@ -470,6 +474,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); if (width * height != elements) { @@ -486,6 +492,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -545,12 +553,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, dst->u.tex.first_layer); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, mode | (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index f63790c329e..1dbad2f39e3 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -7,12 +7,14 @@ C_SOURCES := \ r600_pipe_common.c \ r600_pipe_common.h \ r600_query.c \ + r600_query.h \ r600_streamout.c \ r600_texture.c \ radeon_uvd.c \ radeon_uvd.h \ radeon_vce_40_2_2.c \ radeon_vce_50.c \ + radeon_vce_52.c \ radeon_vce.c \ radeon_vce.h \ radeon_video.c \ diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 3599692a857..7464f677398 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -27,6 +27,7 @@ #include "r600_pipe_common.h" #include "r600_cs.h" #include "tgsi/tgsi_parse.h" +#include "util/list.h" #include "util/u_draw_quad.h" #include "util/u_memory.h" #include "util/u_format_s3tc.h" @@ -135,12 +136,10 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) void r600_preflush_suspend_features(struct r600_common_context *ctx) { /* suspend queries */ - ctx->queries_suspended_for_flush = false; - if (ctx->num_cs_dw_nontimer_queries_suspend) { + if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries)) r600_suspend_nontimer_queries(ctx); + if (!LIST_IS_EMPTY(&ctx->active_timer_queries)) r600_suspend_timer_queries(ctx); - ctx->queries_suspended_for_flush = true; - } ctx->streamout.suspended = false; if (ctx->streamout.begin_emitted) { @@ -157,10 +156,10 @@ void r600_postflush_resume_features(struct r600_common_context *ctx) } /* resume queries */ - if (ctx->queries_suspended_for_flush) { - r600_resume_nontimer_queries(ctx); + if (!LIST_IS_EMPTY(&ctx->active_timer_queries)) r600_resume_timer_queries(ctx); - } + if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries)) + r600_resume_nontimer_queries(ctx); } static void r600_flush_from_st(struct pipe_context *ctx, @@ -718,50 +717,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen) rscreen->info.r600_clock_crystal_freq; } -static int r600_get_driver_query_info(struct pipe_screen *screen, - unsigned index, - struct pipe_driver_query_info *info) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct pipe_driver_query_info list[] = { - {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"draw-calls", R600_QUERY_DRAW_CALLS, {0}}, - {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}}, - {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"GPU-load", R600_QUERY_GPU_LOAD, {100}}, - {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}}, - {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ}, - {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ}, - }; - unsigned num_queries; - - if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) - num_queries = Elements(list); - else if (rscreen->info.drm_major == 3) - num_queries = Elements(list) - 3; - else - num_queries = Elements(list) - 4; - - if (!info) - return num_queries; - - if (index >= num_queries) - return 0; - - *info = list[index]; - return 1; -} - static void r600_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **dst, struct pipe_fence_handle *src) @@ -949,7 +904,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->b.get_device_vendor = r600_get_device_vendor; rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; - rscreen->b.get_driver_query_info = r600_get_driver_query_info; rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; @@ -965,6 +919,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, } r600_init_screen_texture_functions(rscreen); + r600_init_screen_query_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index ebe633b9125..fbdc5c410ae 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -47,21 +47,6 @@ #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) -#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) -#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1) -#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2) -#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3) -#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4) -#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5) -#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6) -#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7) -#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8) -#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9) -#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10) -#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11) -#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12) -#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13) - #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) #define R600_CONTEXT_PRIVATE_FLAG (1u << 1) @@ -408,8 +393,6 @@ struct r600_common_context { struct list_head active_timer_queries; unsigned num_cs_dw_nontimer_queries_suspend; unsigned num_cs_dw_timer_queries_suspend; - /* If queries have been suspended. */ - bool queries_suspended_for_flush; /* Additional hardware info. */ unsigned backend_mask; unsigned max_db; /* for OQ */ @@ -526,6 +509,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen); unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin); /* r600_query.c */ +void r600_init_screen_query_functions(struct r600_common_screen *rscreen); void r600_query_init(struct r600_common_context *rctx); void r600_suspend_nontimer_queries(struct r600_common_context *ctx); void r600_resume_nontimer_queries(struct r600_common_context *ctx); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 8c2b601a96c..b1cfb6e462b 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -22,81 +22,218 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "r600_query.h" #include "r600_cs.h" #include "util/u_memory.h" +/* Queries without buffer handling or suspend/resume. */ +struct r600_query_sw { + struct r600_query b; -struct r600_query_buffer { - /* The buffer where query results are stored. */ - struct r600_resource *buf; - /* Offset of the next free result after current query data */ - unsigned results_end; - /* If a query buffer is full, a new buffer is created and the old one - * is put in here. When we calculate the result, we sum up the samples - * from all buffers. */ - struct r600_query_buffer *previous; -}; - -struct r600_query { - /* The query buffer and how many results are in it. */ - struct r600_query_buffer buffer; - /* The type of query */ - unsigned type; - /* Size of the result in memory for both begin_query and end_query, - * this can be one or two numbers, or it could even be a size of a structure. */ - unsigned result_size; - /* The number of dwords for begin_query or end_query. */ - unsigned num_cs_dw; - /* linked list of queries */ - struct list_head list; - /* for custom non-GPU queries */ uint64_t begin_result; uint64_t end_result; /* Fence for GPU_FINISHED. */ struct pipe_fence_handle *fence; - /* For transform feedback: which stream the query is for */ - unsigned stream; }; - -static bool r600_is_timer_query(unsigned type) +static void r600_query_sw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery) { - return type == PIPE_QUERY_TIME_ELAPSED || - type == PIPE_QUERY_TIMESTAMP; + struct pipe_screen *screen = rctx->b.screen; + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + screen->fence_reference(screen, &query->fence, NULL); + FREE(query); } -static bool r600_query_needs_begin(unsigned type) +static enum radeon_value_id winsys_id_from_type(unsigned type) { - return type != PIPE_QUERY_GPU_FINISHED && - type != PIPE_QUERY_TIMESTAMP; + switch (type) { + case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY; + case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY; + case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; + case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES; + case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; + case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; + case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; + case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; + case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; + case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; + default: unreachable("query type does not correspond to winsys id"); + } } -static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type) +static boolean r600_query_sw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) { - unsigned j, i, num_results, buf_size = 4096; - uint32_t *results; + struct r600_query_sw *query = (struct r600_query_sw *)rquery; - /* Non-GPU queries. */ - switch (type) { + switch(query->b.type) { case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_GPU_FINISHED: + break; case R600_QUERY_DRAW_CALLS: + query->begin_result = rctx->num_draw_calls; + break; case R600_QUERY_REQUESTED_VRAM: case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_VRAM_USAGE: + case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + query->begin_result = 0; + break; case R600_QUERY_BUFFER_WAIT_TIME: case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: + case R600_QUERY_NUM_BYTES_MOVED: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } + case R600_QUERY_GPU_LOAD: + query->begin_result = r600_gpu_load_begin(rctx->screen); + break; + case R600_QUERY_NUM_COMPILATIONS: + query->begin_result = p_atomic_read(&rctx->screen->num_compilations); + break; + case R600_QUERY_NUM_SHADERS_CREATED: + query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + default: + unreachable("r600_query_sw_begin: bad query type"); + } + + return TRUE; +} + +static void r600_query_sw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + switch(query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; + case PIPE_QUERY_GPU_FINISHED: + rctx->b.flush(&rctx->b, &query->fence, 0); + break; + case R600_QUERY_DRAW_CALLS: + query->begin_result = rctx->num_draw_calls; + break; + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_REQUESTED_GTT: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_NUM_CS_FLUSHES: + case R600_QUERY_NUM_BYTES_MOVED: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->end_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } case R600_QUERY_GPU_LOAD: + query->end_result = r600_gpu_load_end(rctx->screen, + query->begin_result); + query->begin_result = 0; + break; case R600_QUERY_NUM_COMPILATIONS: + query->begin_result = p_atomic_read(&rctx->screen->num_compilations); + break; case R600_QUERY_NUM_SHADERS_CREATED: + query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + default: + unreachable("r600_query_sw_end: bad query type"); + } +} + +static boolean r600_query_sw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, + union pipe_query_result *result) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + switch (query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* Convert from cycles per millisecond to cycles per second (Hz). */ + result->timestamp_disjoint.frequency = + (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000; + result->timestamp_disjoint.disjoint = FALSE; + return TRUE; + case PIPE_QUERY_GPU_FINISHED: { + struct pipe_screen *screen = rctx->b.screen; + result->b = screen->fence_finish(screen, query->fence, + wait ? PIPE_TIMEOUT_INFINITE : 0); + return result->b; + } + } + + result->u64 = query->end_result - query->begin_result; + + switch (query->b.type) { + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_GPU_TEMPERATURE: + result->u64 /= 1000; + break; + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + result->u64 *= 1000000; + break; + } + + return TRUE; +} + +static struct r600_query_ops sw_query_ops = { + .destroy = r600_query_sw_destroy, + .begin = r600_query_sw_begin, + .end = r600_query_sw_end, + .get_result = r600_query_sw_get_result +}; + +static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx, + unsigned query_type) +{ + struct r600_query_sw *query; + + query = CALLOC_STRUCT(r600_query_sw); + if (query == NULL) return NULL; + + query->b.type = query_type; + query->b.ops = &sw_query_ops; + + return (struct pipe_query *)query; +} + +void r600_query_hw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + struct r600_query_buffer *prev = query->buffer.previous; + + /* Release all query buffers. */ + while (prev) { + struct r600_query_buffer *qbuf = prev; + prev = prev->previous; + pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); + FREE(qbuf); } + pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL); + FREE(rquery); +} + +static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + unsigned buf_size = 4096; + /* Queries are normally read by the CPU after * being written by the gpu, hence staging is probably a good * usage pattern. @@ -105,14 +242,30 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buf_size); - switch (type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); - memset(results, 0, buf_size); + if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) + query->ops->prepare_buffer(ctx, query, buf); + + return buf; +} + +static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer) +{ + /* Callers ensure that the buffer is currently unused by the GPU. */ + uint32_t *results = ctx->ws->buffer_map(buffer->cs_buf, NULL, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED); + + memset(results, 0, buffer->b.b.width0); + + if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || + query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) { + unsigned num_results; + unsigned i, j; /* Set top bits for unused backends. */ - num_results = buf_size / (16 * ctx->max_db); + num_results = buffer->b.b.width0 / (16 * ctx->max_db); for (j = 0; j < num_results; j++) { for (i = 0; i < ctx->max_db; i++) { if (!(ctx->backend_mask & (1<<i))) { @@ -122,22 +275,109 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c } results += 4 * ctx->max_db; } + } +} + +static struct r600_query_ops query_hw_ops = { + .destroy = r600_query_hw_destroy, + .begin = r600_query_hw_begin, + .end = r600_query_hw_end, + .get_result = r600_query_hw_get_result, +}; + +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_add_result(struct r600_common_context *ctx, + struct r600_query_hw *, void *buffer, + union pipe_query_result *result); +static void r600_query_hw_clear_result(struct r600_query_hw *, + union pipe_query_result *); + +static struct r600_query_hw_ops query_hw_default_hw_ops = { + .prepare_buffer = r600_query_hw_prepare_buffer, + .emit_start = r600_query_hw_do_emit_start, + .emit_stop = r600_query_hw_do_emit_stop, + .clear_result = r600_query_hw_clear_result, + .add_result = r600_query_hw_add_result, +}; + +boolean r600_query_hw_init(struct r600_common_context *rctx, + struct r600_query_hw *query) +{ + query->buffer.buf = r600_new_query_buffer(rctx, query); + if (!query->buffer.buf) + return FALSE; + + return TRUE; +} + +static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx, + unsigned query_type, + unsigned index) +{ + struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw); + if (!query) + return NULL; + + query->b.type = query_type; + query->b.ops = &query_hw_ops; + query->ops = &query_hw_default_hw_ops; + + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + query->result_size = 16 * rctx->max_db; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; + query->flags |= R600_QUERY_HW_FLAG_PREDICATE; break; case PIPE_QUERY_TIME_ELAPSED: + query->result_size = 16; + query->num_cs_dw_begin = 8; + query->num_cs_dw_end = 8; + query->flags = R600_QUERY_HW_FLAG_TIMER; + break; case PIPE_QUERY_TIMESTAMP: + query->result_size = 8; + query->num_cs_dw_end = 8; + query->flags = R600_QUERY_HW_FLAG_TIMER | + R600_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ + query->result_size = 32; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; + query->stream = index; + query->flags |= R600_QUERY_HW_FLAG_PREDICATE; + break; case PIPE_QUERY_PIPELINE_STATISTICS: - results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); - memset(results, 0, buf_size); + /* 11 values on EG, 8 on R600. */ + query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; break; default: assert(0); + FREE(query); + return NULL; } - return buf; + + if (!r600_query_hw_init(rctx, query)) { + FREE(query); + return NULL; + } + + return (struct pipe_query *)query; } static void r600_update_occlusion_query_state(struct r600_common_context *rctx, @@ -159,7 +399,7 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx, } } -static unsigned event_type_for_stream(struct r600_query *query) +static unsigned event_type_for_stream(struct r600_query_hw *query) { switch (query->stream) { default: @@ -170,28 +410,14 @@ static unsigned event_type_for_stream(struct r600_query *query) } } -static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query) +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) { struct radeon_winsys_cs *cs = ctx->gfx.cs; - uint64_t va; - - r600_update_occlusion_query_state(ctx, query->type, 1); - r600_update_prims_generated_query_state(ctx, query->type, 1); - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE); - - /* Get a new query buffer if needed. */ - if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { - struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); - *qbuf = query->buffer; - query->buffer.buf = r600_new_query_buffer(ctx, query->type); - query->buffer.results_end = 0; - query->buffer.previous = qbuf; - } - - /* emit begin query */ - va = query->buffer.buf->gpu_address + query->buffer.results_end; - switch (query->type) { + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); @@ -227,30 +453,50 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q } r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - - if (r600_is_timer_query(query->type)) - ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw; - else - ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; } -static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query) +static void r600_query_hw_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query) { - struct radeon_winsys_cs *cs = ctx->gfx.cs; uint64_t va; - /* The queries which need begin already called this in begin_query. */ - if (!r600_query_needs_begin(query->type)) { - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE); + r600_update_occlusion_query_state(ctx, query->b.type, 1); + r600_update_prims_generated_query_state(ctx, query->b.type, 1); + + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end, + TRUE); + + /* Get a new query buffer if needed. */ + if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { + struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); + *qbuf = query->buffer; + query->buffer.buf = r600_new_query_buffer(ctx, query); + query->buffer.results_end = 0; + query->buffer.previous = qbuf; } - va = query->buffer.buf->gpu_address; + /* emit begin query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_start(ctx, query, query->buffer.buf, va); - /* emit end query */ - switch (query->type) { + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end; + else + ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end; +} + +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - va += query->buffer.results_end + 8; + va += 8; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, va); @@ -260,14 +506,14 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3)); radeon_emit(cs, va); radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_TIME_ELAPSED: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; /* fall through */ case PIPE_QUERY_TIMESTAMP: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); @@ -278,7 +524,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que radeon_emit(cs, 0); break; case PIPE_QUERY_PIPELINE_STATISTICS: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); @@ -289,25 +535,41 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que } r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); +} + +static void r600_query_hw_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + uint64_t va; + + /* The queries which need begin already called this in begin_query. */ + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, FALSE); + } + + /* emit end query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_stop(ctx, query, query->buffer.buf, va); query->buffer.results_end += query->result_size; - if (r600_query_needs_begin(query->type)) { - if (r600_is_timer_query(query->type)) - ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw; + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) { + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end; else - ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; + ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end; } - r600_update_occlusion_query_state(ctx, query->type, -1); - r600_update_prims_generated_query_state(ctx, query->type, -1); + r600_update_occlusion_query_state(ctx, query->b.type, -1); + r600_update_prims_generated_query_state(ctx, query->b.type, -1); } static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = ctx->gfx.cs; - struct r600_query *query = (struct r600_query*)ctx->render_cond; + struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond; struct r600_query_buffer *qbuf; uint32_t op; bool flag_wait; @@ -318,7 +580,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; - switch (query->type) { + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: op = PRED_OP(PREDICATION_OP_ZPASS); @@ -364,94 +626,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *query; - bool skip_allocation = false; - query = CALLOC_STRUCT(r600_query); - if (query == NULL) - return NULL; - - query->type = query_type; - - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - query->result_size = 16 * rctx->max_db; - query->num_cs_dw = 6; - break; - break; - case PIPE_QUERY_TIME_ELAPSED: - query->result_size = 16; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_TIMESTAMP: - query->result_size = 8; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ - query->result_size = 32; - query->num_cs_dw = 6; - query->stream = index; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - /* 11 values on EG, 8 on R600. */ - query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; - query->num_cs_dw = 6; - break; - /* Non-GPU queries and queries not requiring a buffer. */ - case PIPE_QUERY_TIMESTAMP_DISJOINT: - case PIPE_QUERY_GPU_FINISHED: - case R600_QUERY_DRAW_CALLS: - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_GPU_LOAD: - case R600_QUERY_NUM_COMPILATIONS: - case R600_QUERY_NUM_SHADERS_CREATED: - skip_allocation = true; - break; - default: - assert(0); - FREE(query); - return NULL; - } + if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || + query_type == PIPE_QUERY_GPU_FINISHED || + query_type >= PIPE_QUERY_DRIVER_SPECIFIC) + return r600_query_sw_create(ctx, query_type); - if (!skip_allocation) { - query->buffer.buf = r600_new_query_buffer(rctx, query_type); - if (!query->buffer.buf) { - FREE(query); - return NULL; - } - } - return (struct pipe_query*)query; + return r600_query_hw_create(rctx, query_type, index); } static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_query *rquery = (struct r600_query*)query; - struct r600_query_buffer *prev = rquery->buffer.previous; - - /* Release all query buffers. */ - while (prev) { - struct r600_query_buffer *qbuf = prev; - prev = prev->previous; - pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); - FREE(qbuf); - } + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); - FREE(query); + rquery->ops->destroy(rctx, rquery); } static boolean r600_begin_query(struct pipe_context *ctx, @@ -459,48 +648,14 @@ static boolean r600_begin_query(struct pipe_context *ctx, { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - struct r600_query_buffer *prev = rquery->buffer.previous; - if (!r600_query_needs_begin(rquery->type)) { - assert(0); - return false; - } + return rquery->ops->begin(rctx, rquery); +} - /* Non-GPU queries. */ - switch (rquery->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - return true; - case R600_QUERY_DRAW_CALLS: - rquery->begin_result = rctx->num_draw_calls; - return true; - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - rquery->begin_result = 0; - return true; - case R600_QUERY_BUFFER_WAIT_TIME: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000; - return true; - case R600_QUERY_NUM_CS_FLUSHES: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); - return true; - case R600_QUERY_NUM_BYTES_MOVED: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); - return true; - case R600_QUERY_GPU_LOAD: - rquery->begin_result = r600_gpu_load_begin(rctx->screen); - return true; - case R600_QUERY_NUM_COMPILATIONS: - rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations); - return true; - case R600_QUERY_NUM_SHADERS_CREATED: - rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); - return true; - } +static void r600_query_hw_reset_buffers(struct r600_common_context *rctx, + struct r600_query_hw *query) +{ + struct r600_query_buffer *prev = query->buffer.previous; /* Discard the old query buffers. */ while (prev) { @@ -510,22 +665,39 @@ static boolean r600_begin_query(struct pipe_context *ctx, FREE(qbuf); } - /* Obtain a new buffer if the current one can't be mapped without a stall. */ - if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || - !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { - pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); - rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type); + if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) { + /* Obtain a new buffer if the current one can't be mapped without a stall. */ + if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || + !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { + pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL); + query->buffer.buf = r600_new_query_buffer(rctx, query); + } else { + query->ops->prepare_buffer(rctx, query, query->buffer.buf); + } } - rquery->buffer.results_end = 0; - rquery->buffer.previous = NULL; + query->buffer.results_end = 0; + query->buffer.previous = NULL; +} - r600_emit_query_begin(rctx, rquery); +boolean r600_query_hw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; - if (r600_is_timer_query(rquery->type)) - LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries); + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + assert(0); + return false; + } + + r600_query_hw_reset_buffers(rctx, query); + + r600_query_hw_emit_start(rctx, query); + + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + LIST_ADDTAIL(&query->list, &rctx->active_timer_queries); else - LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries); + LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries); return true; } @@ -534,64 +706,24 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - /* Non-GPU queries. */ - switch (rquery->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - return; - case PIPE_QUERY_GPU_FINISHED: - ctx->flush(ctx, &rquery->fence, 0); - return; - case R600_QUERY_DRAW_CALLS: - rquery->end_result = rctx->num_draw_calls; - return; - case R600_QUERY_REQUESTED_VRAM: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY); - return; - case R600_QUERY_REQUESTED_GTT: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY); - return; - case R600_QUERY_BUFFER_WAIT_TIME: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000; - return; - case R600_QUERY_NUM_CS_FLUSHES: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); - return; - case R600_QUERY_NUM_BYTES_MOVED: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); - return; - case R600_QUERY_VRAM_USAGE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE); - return; - case R600_QUERY_GTT_USAGE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE); - return; - case R600_QUERY_GPU_TEMPERATURE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000; - return; - case R600_QUERY_CURRENT_GPU_SCLK: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000; - return; - case R600_QUERY_CURRENT_GPU_MCLK: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000; - return; - case R600_QUERY_GPU_LOAD: - rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result); - return; - case R600_QUERY_NUM_COMPILATIONS: - rquery->end_result = p_atomic_read(&rctx->screen->num_compilations); - return; - case R600_QUERY_NUM_SHADERS_CREATED: - rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created); - return; - } + rquery->ops->end(rctx, rquery); +} - r600_emit_query_end(rctx, rquery); +void r600_query_hw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + + if (query->flags & R600_QUERY_HW_FLAG_NO_START) + r600_query_hw_reset_buffers(rctx, query); - if (r600_query_needs_begin(rquery->type)) - LIST_DELINIT(&rquery->list); + r600_query_hw_emit_stop(rctx, query); + + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) + LIST_DELINIT(&query->list); } -static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, +static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, bool test_status_bit) { uint32_t *current_result = (uint32_t*)map; @@ -609,80 +741,36 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned return 0; } -static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, - struct r600_query *query, - struct r600_query_buffer *qbuf, - boolean wait, - union pipe_query_result *result) +static void r600_query_hw_add_result(struct r600_common_context *ctx, + struct r600_query_hw *query, + void *buffer, + union pipe_query_result *result) { - struct pipe_screen *screen = ctx->b.screen; - unsigned results_base = 0; - char *map; - - /* Non-GPU queries. */ - switch (query->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* Convert from cycles per millisecond to cycles per second (Hz). */ - result->timestamp_disjoint.frequency = - (uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000; - result->timestamp_disjoint.disjoint = FALSE; - return TRUE; - case PIPE_QUERY_GPU_FINISHED: - result->b = screen->fence_finish(screen, query->fence, - wait ? PIPE_TIMEOUT_INFINITE : 0); - return result->b; - case R600_QUERY_DRAW_CALLS: - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_NUM_COMPILATIONS: - case R600_QUERY_NUM_SHADERS_CREATED: - result->u64 = query->end_result - query->begin_result; - return TRUE; - case R600_QUERY_GPU_LOAD: - result->u64 = query->end_result; - return TRUE; - } - - map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf, - PIPE_TRANSFER_READ | - (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); - if (!map) - return FALSE; - - /* count all results across all data blocks */ - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - while (results_base != qbuf->results_end) { + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: { + unsigned results_base = 0; + while (results_base != query->result_size) { result->u64 += - r600_query_read_result(map + results_base, 0, 2, true); + r600_query_read_result(buffer + results_base, 0, 2, true); results_base += 16; } break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - while (results_base != qbuf->results_end) { + } + case PIPE_QUERY_OCCLUSION_PREDICATE: { + unsigned results_base = 0; + while (results_base != query->result_size) { result->b = result->b || - r600_query_read_result(map + results_base, 0, 2, true) != 0; + r600_query_read_result(buffer + results_base, 0, 2, true) != 0; results_base += 16; } break; + } case PIPE_QUERY_TIME_ELAPSED: - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 0, 2, false); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 0, 2, false); break; case PIPE_QUERY_TIMESTAMP: { - uint32_t *current_result = (uint32_t*)map; + uint32_t *current_result = (uint32_t*)buffer; result->u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32; break; @@ -694,84 +782,64 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, * u64 PrimitiveStorageNeeded; * } * We only need NumPrimitivesWritten here. */ - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 2, 6, true); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 2, 6, true); break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* Here we read PrimitiveStorageNeeded. */ - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_SO_STATISTICS: - while (results_base != qbuf->results_end) { - result->so_statistics.num_primitives_written += - r600_query_read_result(map + results_base, 2, 6, true); - result->so_statistics.primitives_storage_needed += - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->so_statistics.num_primitives_written += + r600_query_read_result(buffer, 2, 6, true); + result->so_statistics.primitives_storage_needed += + r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - while (results_base != qbuf->results_end) { - result->b = result->b || - r600_query_read_result(map + results_base, 2, 6, true) != - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->b = result->b || + r600_query_read_result(buffer, 2, 6, true) != + r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_PIPELINE_STATISTICS: if (ctx->chip_class >= EVERGREEN) { - while (results_base != qbuf->results_end) { - result->pipeline_statistics.ps_invocations += - r600_query_read_result(map + results_base, 0, 22, false); - result->pipeline_statistics.c_primitives += - r600_query_read_result(map + results_base, 2, 24, false); - result->pipeline_statistics.c_invocations += - r600_query_read_result(map + results_base, 4, 26, false); - result->pipeline_statistics.vs_invocations += - r600_query_read_result(map + results_base, 6, 28, false); - result->pipeline_statistics.gs_invocations += - r600_query_read_result(map + results_base, 8, 30, false); - result->pipeline_statistics.gs_primitives += - r600_query_read_result(map + results_base, 10, 32, false); - result->pipeline_statistics.ia_primitives += - r600_query_read_result(map + results_base, 12, 34, false); - result->pipeline_statistics.ia_vertices += - r600_query_read_result(map + results_base, 14, 36, false); - result->pipeline_statistics.hs_invocations += - r600_query_read_result(map + results_base, 16, 38, false); - result->pipeline_statistics.ds_invocations += - r600_query_read_result(map + results_base, 18, 40, false); - result->pipeline_statistics.cs_invocations += - r600_query_read_result(map + results_base, 20, 42, false); - results_base += query->result_size; - } + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 22, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 24, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 26, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 28, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 30, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 32, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 34, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 36, false); + result->pipeline_statistics.hs_invocations += + r600_query_read_result(buffer, 16, 38, false); + result->pipeline_statistics.ds_invocations += + r600_query_read_result(buffer, 18, 40, false); + result->pipeline_statistics.cs_invocations += + r600_query_read_result(buffer, 20, 42, false); } else { - while (results_base != qbuf->results_end) { - result->pipeline_statistics.ps_invocations += - r600_query_read_result(map + results_base, 0, 16, false); - result->pipeline_statistics.c_primitives += - r600_query_read_result(map + results_base, 2, 18, false); - result->pipeline_statistics.c_invocations += - r600_query_read_result(map + results_base, 4, 20, false); - result->pipeline_statistics.vs_invocations += - r600_query_read_result(map + results_base, 6, 22, false); - result->pipeline_statistics.gs_invocations += - r600_query_read_result(map + results_base, 8, 24, false); - result->pipeline_statistics.gs_primitives += - r600_query_read_result(map + results_base, 10, 26, false); - result->pipeline_statistics.ia_primitives += - r600_query_read_result(map + results_base, 12, 28, false); - result->pipeline_statistics.ia_vertices += - r600_query_read_result(map + results_base, 14, 30, false); - results_base += query->result_size; - } + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 16, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 18, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 20, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 22, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 24, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 26, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 28, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 30, false); } #if 0 /* for testing */ printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " @@ -793,23 +861,47 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, default: assert(0); } - - return TRUE; } static boolean r600_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, - boolean wait, union pipe_query_result *result) + struct pipe_query *query, boolean wait, + union pipe_query_result *result) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; + + return rquery->ops->get_result(rctx, rquery, wait, result); +} + +static void r600_query_hw_clear_result(struct r600_query_hw *query, + union pipe_query_result *result) +{ + util_query_clear_result(result, query->b.type); +} + +boolean r600_query_hw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, union pipe_query_result *result) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; struct r600_query_buffer *qbuf; - util_query_clear_result(result, rquery->type); + query->ops->clear_result(query, result); - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) { - if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) { + for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { + unsigned results_base = 0; + void *map; + + map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, + PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); + if (!map) return FALSE; + + while (results_base != qbuf->results_end) { + query->ops->add_result(rctx, query, map + results_base, + result); + results_base += query->result_size; } } @@ -827,7 +919,7 @@ static void r600_render_condition(struct pipe_context *ctx, uint mode) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query*)query; + struct r600_query_hw *rquery = (struct r600_query_hw *)query; struct r600_query_buffer *qbuf; struct r600_atom *atom = &rctx->render_cond_atom; @@ -837,8 +929,10 @@ static void r600_render_condition(struct pipe_context *ctx, /* Compute the size of SET_PREDICATION packets. */ atom->num_dw = 0; - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) - atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + if (query) { + for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) + atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + } rctx->set_atom_dirty(rctx, atom, query != NULL); } @@ -847,10 +941,10 @@ static void r600_suspend_queries(struct r600_common_context *ctx, struct list_head *query_list, unsigned *num_cs_dw_queries_suspend) { - struct r600_query *query; + struct r600_query_hw *query; LIST_FOR_EACH_ENTRY(query, query_list, list) { - r600_emit_query_end(ctx, query); + r600_query_hw_emit_stop(ctx, query); } assert(*num_cs_dw_queries_suspend == 0); } @@ -870,19 +964,19 @@ void r600_suspend_timer_queries(struct r600_common_context *ctx) static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx, struct list_head *query_list) { - struct r600_query *query; + struct r600_query_hw *query; unsigned num_dw = 0; LIST_FOR_EACH_ENTRY(query, query_list, list) { /* begin + end */ - num_dw += query->num_cs_dw * 2; + num_dw += query->num_cs_dw_begin + query->num_cs_dw_end; /* Workaround for the fact that * num_cs_dw_nontimer_queries_suspend is incremented for every * resumed query, which raises the bar in need_cs_space for * queries about to be resumed. */ - num_dw += query->num_cs_dw; + num_dw += query->num_cs_dw_end; } /* primitives generated query */ num_dw += ctx->streamout.enable_atom.num_dw; @@ -896,7 +990,7 @@ static void r600_resume_queries(struct r600_common_context *ctx, struct list_head *query_list, unsigned *num_cs_dw_queries_suspend) { - struct r600_query *query; + struct r600_query_hw *query; unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list); assert(*num_cs_dw_queries_suspend == 0); @@ -905,7 +999,7 @@ static void r600_resume_queries(struct r600_common_context *ctx, ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE); LIST_FOR_EACH_ENTRY(query, query_list, list) { - r600_emit_query_begin(ctx, query); + r600_query_hw_emit_start(ctx, query); } } @@ -1002,6 +1096,76 @@ err: return; } +#define X(name_, query_type_, type_, result_type_) \ + { \ + .name = name_, \ + .query_type = R600_QUERY_##query_type_, \ + .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ + .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ + .group_id = ~(unsigned)0 \ + } + +static struct pipe_driver_query_info r600_driver_query_list[] = { + X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), + X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), + X("draw-calls", DRAW_CALLS, UINT64, CUMULATIVE), + X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), + X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), + X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), + X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, CUMULATIVE), + X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), + X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), + X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), + X("GPU-load", GPU_LOAD, UINT64, AVERAGE), + X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), + X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), + X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE), +}; + +#undef X + +static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) +{ + if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) + return Elements(r600_driver_query_list); + else if (rscreen->info.drm_major == 3) + return Elements(r600_driver_query_list) - 3; + else + return Elements(r600_driver_query_list) - 4; +} + +static int r600_get_driver_query_info(struct pipe_screen *screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + unsigned num_queries = r600_get_num_queries(rscreen); + + if (!info) + return num_queries; + + if (index >= num_queries) + return 0; + + *info = r600_driver_query_list[index]; + + switch (info->query_type) { + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_VRAM_USAGE: + info->max_value.u64 = rscreen->info.vram_size; + break; + case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_GTT_USAGE: + info->max_value.u64 = rscreen->info.gart_size; + break; + case R600_QUERY_GPU_TEMPERATURE: + info->max_value.u64 = 125; + break; + } + + return 1; +} + void r600_query_init(struct r600_common_context *rctx) { rctx->b.create_query = r600_create_query; @@ -1017,3 +1181,8 @@ void r600_query_init(struct r600_common_context *rctx) LIST_INITHEAD(&rctx->active_nontimer_queries); LIST_INITHEAD(&rctx->active_timer_queries); } + +void r600_init_screen_query_functions(struct r600_common_screen *rscreen) +{ + rscreen->b.get_driver_query_info = r600_get_driver_query_info; +} diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h new file mode 100644 index 00000000000..0ea5707ca45 --- /dev/null +++ b/src/gallium/drivers/radeon/r600_query.h @@ -0,0 +1,136 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Nicolai Hähnle <[email protected]> + * + */ + +#ifndef R600_QUERY_H +#define R600_QUERY_H + +#include "pipe/p_defines.h" +#include "util/list.h" + +struct r600_common_context; +struct r600_query; +struct r600_query_hw; +struct r600_resource; + +#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) +#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1) +#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2) +#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3) +#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4) +#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5) +#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6) +#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7) +#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8) +#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9) +#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11) +#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12) +#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13) +#define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100) + +struct r600_query_ops { + void (*destroy)(struct r600_common_context *, struct r600_query *); + boolean (*begin)(struct r600_common_context *, struct r600_query *); + void (*end)(struct r600_common_context *, struct r600_query *); + boolean (*get_result)(struct r600_common_context *, + struct r600_query *, boolean wait, + union pipe_query_result *result); +}; + +struct r600_query { + struct r600_query_ops *ops; + + /* The type of query */ + unsigned type; +}; + +enum { + R600_QUERY_HW_FLAG_NO_START = (1 << 0), + R600_QUERY_HW_FLAG_TIMER = (1 << 1), + R600_QUERY_HW_FLAG_PREDICATE = (1 << 2), +}; + +struct r600_query_hw_ops { + void (*prepare_buffer)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *); + void (*emit_start)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *buffer, uint64_t va); + void (*emit_stop)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *buffer, uint64_t va); + void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); + void (*add_result)(struct r600_common_context *ctx, + struct r600_query_hw *, void *buffer, + union pipe_query_result *result); +}; + +struct r600_query_buffer { + /* The buffer where query results are stored. */ + struct r600_resource *buf; + /* Offset of the next free result after current query data */ + unsigned results_end; + /* If a query buffer is full, a new buffer is created and the old one + * is put in here. When we calculate the result, we sum up the samples + * from all buffers. */ + struct r600_query_buffer *previous; +}; + +struct r600_query_hw { + struct r600_query b; + struct r600_query_hw_ops *ops; + unsigned flags; + + /* The query buffer and how many results are in it. */ + struct r600_query_buffer buffer; + /* Size of the result in memory for both begin_query and end_query, + * this can be one or two numbers, or it could even be a size of a structure. */ + unsigned result_size; + /* The number of dwords for begin_query or end_query. */ + unsigned num_cs_dw_begin; + unsigned num_cs_dw_end; + /* Linked list of queries */ + struct list_head list; + /* For transform feedback: which stream the query is for */ + unsigned stream; +}; + +boolean r600_query_hw_init(struct r600_common_context *rctx, + struct r600_query_hw *query); +void r600_query_hw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery); +boolean r600_query_hw_begin(struct r600_common_context *rctx, + struct r600_query *rquery); +void r600_query_hw_end(struct r600_common_context *rctx, + struct r600_query *rquery); +boolean r600_query_hw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, + union pipe_query_result *result); + +#endif /* R600_QUERY_H */ diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 0dac6fbbdce..8a60441c056 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -49,6 +49,7 @@ #define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8)) #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8)) +#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) /** * flush commands to the hardware @@ -405,7 +406,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, enc->use_vm = true; if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42)) enc->use_vui = true; - if (rscreen->info.family >= CHIP_TONGA) + if (rscreen->info.family >= CHIP_TONGA && + rscreen->info.family != CHIP_STONEY) enc->dual_pipe = true; /* TODO enable B frame with dual instance */ if ((rscreen->info.family >= CHIP_TONGA) && @@ -478,6 +480,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, radeon_vce_50_init(enc); break; + case FW_52_0_3: + radeon_vce_52_init(enc); + break; + default: goto error; } @@ -500,11 +506,17 @@ error: */ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen) { - return rscreen->info.vce_fw_version == FW_40_2_2 || - rscreen->info.vce_fw_version == FW_50_0_1 || - rscreen->info.vce_fw_version == FW_50_1_2 || - rscreen->info.vce_fw_version == FW_50_10_2 || - rscreen->info.vce_fw_version == FW_50_17_3; + switch (rscreen->info.vce_fw_version) { + case FW_40_2_2: + case FW_50_0_1: + case FW_50_1_2: + case FW_50_10_2: + case FW_50_17_3: + case FW_52_0_3: + return true; + default: + return false; + } } /** diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index 624bda479f8..25e2133521f 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -140,4 +140,7 @@ void radeon_vce_40_2_2_init(struct rvce_encoder *enc); /* init vce fw 50 specific callbacks */ void radeon_vce_50_init(struct rvce_encoder *enc); +/* init vce fw 52 specific callbacks */ +void radeon_vce_52_init(struct rvce_encoder *enc); + #endif diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c new file mode 100644 index 00000000000..fbae1f97f41 --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_vce_52.c @@ -0,0 +1,242 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> + +#include "pipe/p_video_codec.h" + +#include "util/u_video.h" +#include "util/u_memory.h" + +#include "vl/vl_video_buffer.h" + +#include "r600_pipe_common.h" +#include "radeon_video.h" +#include "radeon_vce.h" + +static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 }; + +static void create(struct rvce_encoder *enc) +{ + enc->task_info(enc, 0x00000000, 0, 0, 0); + + RVCE_BEGIN(0x01000001); // create cmd + RVCE_CS(0x00000000); // encUseCircularBuffer + RVCE_CS(profiles[enc->base.profile - + PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile + RVCE_CS(enc->base.level); // encLevel + RVCE_CS(0x00000000); // encPicStructRestriction + RVCE_CS(enc->base.width); // encImageWidth + RVCE_CS(enc->base.height); // encImageHeight + RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch + RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw + RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO + + RVCE_CS(0x00000000); // encPreEncodeContextBufferOffset + RVCE_CS(0x00000000); // encPreEncodeInputLumaBufferOffset + RVCE_CS(0x00000000); // encPreEncodeInputChromaBufferOffs + RVCE_CS(0x00000000); // encPreEncodeMode|ChromaFlag|VBAQMode|SceneChangeSensitivity + RVCE_END(); +} + +static void encode(struct rvce_encoder *enc) +{ + signed luma_offset, chroma_offset, bs_offset; + unsigned dep, bs_idx = enc->bs_idx++; + int i; + + if (enc->dual_inst) { + if (bs_idx == 0) + dep = 1; + else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + dep = 0; + else + dep = 2; + } else + dep = 0; + + enc->task_info(enc, 0x00000003, dep, 0, bs_idx); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo + RVCE_END(); + + bs_offset = -(signed)(bs_idx * enc->bs_size); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + if (enc->dual_pipe) { + unsigned aux_offset = enc->cpb.res->buf->size - + RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + RVCE_BEGIN(0x05000002); // auxiliary buffer + for (i = 0; i < 8; ++i) { + RVCE_CS(aux_offset); + aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; + } + for (i = 0; i < 8; ++i) + RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); + RVCE_END(); + } + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(0x00000000); // forceRefreshMap + RVCE_CS(0x00000000); // insertAUD + RVCE_CS(0x00000000); // endOfSequence + RVCE_CS(0x00000000); // endOfStream + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch + RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch + if (enc->dual_pipe) + RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + else + RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + RVCE_CS(0x00000000); // encInputPicTileConfig + RVCE_CS(enc->pic.picture_type); // encPicType + RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + RVCE_CS(0x00000000); // encIdrPicId + RVCE_CS(0x00000000); // encMGSKeyPic + RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag + RVCE_CS(0x00000000); // encTemporalLayerIndex + RVCE_CS(0x00000000); // num_ref_idx_active_override_flag + RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 + RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 + + i = enc->pic.frame_num - enc->pic.ref_idx_l0; + if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + RVCE_CS(0x00000001); // encRefListModificationOp + RVCE_CS(i - 1); // encRefListModificationNum + } else { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + + for (i = 0; i < 3; ++i) { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + for (i = 0; i < 4; ++i) { + RVCE_CS(0x00000000); // encDecodedPictureMarkingOp + RVCE_CS(0x00000000); // encDecodedPictureMarkingNum + RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = l0_slot(enc); + rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); // encPicType + RVCE_CS(l0->frame_num); // frameNumber + RVCE_CS(l0->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + // encReferencePictureL0[1] + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = l1_slot(enc); + rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); // encPicType + RVCE_CS(l1->frame_num); // frameNumber + RVCE_CS(l1->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); // encReconstructedLumaOffset + RVCE_CS(chroma_offset); // encReconstructedChromaOffset + RVCE_CS(0x00000000); // encColocBufferOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset + RVCE_CS(0x00000000); // pictureCount + RVCE_CS(enc->pic.frame_num); // frameNumber + RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount + RVCE_CS(0x00000000); // numIPicRemainInRCGOP + RVCE_CS(0x00000000); // numPPicRemainInRCGOP + RVCE_CS(0x00000000); // numBPicRemainInRCGOP + RVCE_CS(0x00000000); // numIRPicRemainInRCGOP + RVCE_CS(0x00000000); // enableIntraRefresh + + RVCE_CS(0x00000000); // aq_variance_en + RVCE_CS(0x00000000); // aq_block_size + RVCE_CS(0x00000000); // aq_mb_variance_sel + RVCE_CS(0x00000000); // aq_frame_variance_sel + RVCE_CS(0x00000000); // aq_param_a + RVCE_CS(0x00000000); // aq_param_b + RVCE_CS(0x00000000); // aq_param_c + RVCE_CS(0x00000000); // aq_param_d + RVCE_CS(0x00000000); // aq_param_e + + RVCE_CS(0x00000000); // contextInSFB + RVCE_END(); +} + +void radeon_vce_52_init(struct rvce_encoder *enc) +{ + radeon_vce_50_init(enc); + + enc->create = create; + enc->encode = encode; +} diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 93847d5ec2f..209b940aa11 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3256,25 +3256,34 @@ si_write_harvested_raster_configs(struct si_context *sctx, } } - /* GRBM_GFX_INDEX is privileged on VI */ - if (sctx->b.chip_class <= CIK) + /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ + if (sctx->b.chip_class < CIK) si_pm4_set_reg(pm4, GRBM_GFX_INDEX, SE_INDEX(se) | SH_BROADCAST_WRITES | INSTANCE_BROADCAST_WRITES); + else + si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, + S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1)); si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); if (sctx->b.chip_class >= CIK) si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } - /* GRBM_GFX_INDEX is privileged on VI */ - if (sctx->b.chip_class <= CIK) + /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ + if (sctx->b.chip_class < CIK) si_pm4_set_reg(pm4, GRBM_GFX_INDEX, SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | INSTANCE_BROADCAST_WRITES); + else + si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, + S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1)); } static void si_init_config(struct si_context *sctx) { + struct si_screen *sscreen = sctx->screen; unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; unsigned raster_config, raster_config_1; @@ -3345,9 +3354,14 @@ static void si_init_config(struct si_context *sctx) raster_config_1 = 0x0000002e; break; case CHIP_FIJI: - /* Fiji should be same as Hawaii, but that causes corruption in some cases */ - raster_config = 0x16000012; /* 0x3a00161a */ - raster_config_1 = 0x0000002a; /* 0x0000002e */ + if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { + /* old kernels with old tiling config */ + raster_config = 0x16000012; + raster_config_1 = 0x0000002a; + } else { + raster_config = 0x3a00161a; + raster_config_1 = 0x0000002e; + } break; case CHIP_TONGA: raster_config = 0x16000012; diff --git a/src/gallium/drivers/softpipe/Automake.inc b/src/gallium/drivers/softpipe/Automake.inc index 6455f3caa3d..bd3c2eead16 100644 --- a/src/gallium/drivers/softpipe/Automake.inc +++ b/src/gallium/drivers/softpipe/Automake.inc @@ -3,13 +3,10 @@ if HAVE_GALLIUM_SOFTPIPE TARGET_DRIVERS += swrast TARGET_CPPFLAGS += -DGALLIUM_SOFTPIPE TARGET_LIB_DEPS += \ - $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la \ $(top_builddir)/src/gallium/drivers/softpipe/libsoftpipe.la -if HAVE_DRI2 +if HAVE_DRISW_KMS TARGET_DRIVERS += kms_swrast -TARGET_LIB_DEPS += \ - $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la endif endif diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index bcce18a3502..6a4f9d8d076 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -51,14 +51,16 @@ #define SVGA_QUERY_NUM_VALIDATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 3) #define SVGA_QUERY_MAP_BUFFER_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 4) #define SVGA_QUERY_NUM_RESOURCES_MAPPED (PIPE_QUERY_DRIVER_SPECIFIC + 5) +#define SVGA_QUERY_NUM_BYTES_UPLOADED (PIPE_QUERY_DRIVER_SPECIFIC + 6) + /* running total counters */ -#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 6) -#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 7) -#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 8) -#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 9) -#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 7) +#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 8) +#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 9) +#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 11) /*SVGA_QUERY_MAX has to be last because it is size of an array*/ -#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 11) +#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 12) /** * Maximum supported number of constant buffers per shader @@ -485,6 +487,7 @@ struct svga_context uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */ uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */ uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */ + uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */ } hud; /** The currently bound stream output targets */ diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c index 28b8064bf70..2b549dfa5bb 100644 --- a/src/gallium/drivers/svga/svga_format.c +++ b/src/gallium/drivers/svga/svga_format.c @@ -53,17 +53,17 @@ static const struct vgpu10_format_entry format_conversion_table[] = { PIPE_FORMAT_A8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_X8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_B5G5R5A1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, 0 }, - { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A4R4G4B4, 0 }, + { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_B5G6R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, 0 }, { PIPE_FORMAT_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, 0 }, - { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_LUMINANCE8, 0 }, + { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, 0 }, { PIPE_FORMAT_I8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_L8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_UYVY, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_YUYV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, - { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_Z_D16, 0 }, + { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_D16_UNORM, 0 }, { PIPE_FORMAT_Z32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_Z32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, 0 }, { PIPE_FORMAT_Z24_UNORM_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 }, @@ -152,14 +152,14 @@ static const struct vgpu10_format_entry format_conversion_table[] = { PIPE_FORMAT_A8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_X8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_R8G8B8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, 0 }, - { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 }, - { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 }, - { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 }, - { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 }, + { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 }, + { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 }, + { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM, 0 }, + { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM, 0 }, + { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM_SRGB, 0 }, { PIPE_FORMAT_RGTC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_UNORM, 0 }, { PIPE_FORMAT_RGTC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_SNORM, 0 }, { PIPE_FORMAT_RGTC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, 0 }, @@ -326,6 +326,34 @@ static const struct vgpu10_format_entry format_conversion_table[] = { PIPE_FORMAT_ETC2_R11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_ETC2_RG11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_ETC2_RG11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_4x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_4x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x12_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, }; @@ -472,7 +500,7 @@ struct format_cap { * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling. * If we want to query if a format supports both rendering and sampling the - * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and + * host will tell us no for SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D16_UNORM and * SVGA3D_R24_UNORM_X8_TYPELESS. So we override the host query for those * formats and report that both can do rendering and sampling. */ @@ -1410,27 +1438,50 @@ static const struct format_cap format_cap_table[] = { }, { "SVGA3D_BC1_TYPELESS", - SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0 + SVGA3D_BC1_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC1_UNORM_SRGB", - SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0 + SVGA3D_BC1_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC2_TYPELESS", - SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0 + SVGA3D_BC2_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC2_UNORM_SRGB", - SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0 + SVGA3D_BC2_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC3_TYPELESS", - SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0 + SVGA3D_BC3_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC3_UNORM_SRGB", - SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0 + SVGA3D_BC3_UNORM_SRGB, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC4_TYPELESS", @@ -1671,7 +1722,7 @@ static const struct format_cap format_cap_table[] = { { "SVGA3D_D16_UNORM", SVGA3D_D16_UNORM, - SVGA3D_DEVCAP_DXFMT_D16_UNORM, + 0, /*SVGA3D_DEVCAP_DXFMT_D16_UNORM*/ 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | @@ -1690,15 +1741,27 @@ static const struct format_cap format_cap_table[] = { }, { "SVGA3D_BC1_UNORM", - SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0 + SVGA3D_BC1_UNORM, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC2_UNORM", - SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0 + SVGA3D_BC2_UNORM, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC3_UNORM", - SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0 + SVGA3D_BC3_UNORM, + SVGA3D_DEVCAP_DXFMT_BC3_UNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_B5G6R5_UNORM", @@ -2053,6 +2116,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_R8G8_UINT: case SVGA3D_R8G8_SINT: return SVGA3D_R8G8_TYPELESS; + case SVGA3D_D16_UNORM: case SVGA3D_R16_UNORM: case SVGA3D_R16_UINT: case SVGA3D_R16_SNORM: @@ -2070,6 +2134,15 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_B8G8R8X8_UNORM_SRGB: case SVGA3D_B8G8R8X8_UNORM: return SVGA3D_B8G8R8X8_TYPELESS; + case SVGA3D_BC1_UNORM: + case SVGA3D_BC1_UNORM_SRGB: + return SVGA3D_BC1_TYPELESS; + case SVGA3D_BC2_UNORM: + case SVGA3D_BC2_UNORM_SRGB: + return SVGA3D_BC2_TYPELESS; + case SVGA3D_BC3_UNORM: + case SVGA3D_BC3_UNORM_SRGB: + return SVGA3D_BC3_TYPELESS; case SVGA3D_BC4_UNORM: case SVGA3D_BC4_SNORM: return SVGA3D_BC4_TYPELESS; @@ -2079,18 +2152,10 @@ svga_typeless_format(SVGA3dSurfaceFormat format) /* Special cases (no corresponding _TYPELESS formats) */ case SVGA3D_A8_UNORM: - case SVGA3D_A4R4G4B4: case SVGA3D_B5G5R5A1_UNORM: case SVGA3D_B5G6R5_UNORM: - case SVGA3D_DXT1: - case SVGA3D_DXT2: - case SVGA3D_DXT3: - case SVGA3D_DXT4: - case SVGA3D_DXT5: case SVGA3D_R11G11B10_FLOAT: case SVGA3D_R9G9B9E5_SHAREDEXP: - case SVGA3D_Z_D32: - case SVGA3D_Z_D16: return format; default: debug_printf("Unexpected format %s in %s\n", @@ -2098,3 +2163,26 @@ svga_typeless_format(SVGA3dSurfaceFormat format) return format; } } + + +/** + * Given a surface format, return the corresponding format to use for + * a texture sampler. In most cases, it's the format unchanged, but there + * are some special cases. + */ +SVGA3dSurfaceFormat +svga_sampler_format(SVGA3dSurfaceFormat format) +{ + switch (format) { + case SVGA3D_D16_UNORM: + return SVGA3D_R16_UNORM; + case SVGA3D_D24_UNORM_S8_UINT: + return SVGA3D_R24_UNORM_X8_TYPELESS; + case SVGA3D_D32_FLOAT: + return SVGA3D_R32_FLOAT; + case SVGA3D_D32_FLOAT_S8X24_UINT: + return SVGA3D_R32_FLOAT_X8X24_TYPELESS; + default: + return format; + } +} diff --git a/src/gallium/drivers/svga/svga_format.h b/src/gallium/drivers/svga/svga_format.h index 0af218cb01a..9f9a530d473 100644 --- a/src/gallium/drivers/svga/svga_format.h +++ b/src/gallium/drivers/svga/svga_format.h @@ -93,4 +93,8 @@ SVGA3dSurfaceFormat svga_typeless_format(SVGA3dSurfaceFormat format); +SVGA3dSurfaceFormat +svga_sampler_format(SVGA3dSurfaceFormat format); + + #endif /* SVGA_FORMAT_H_ */ diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 8b9818334ca..5416a009dcb 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -731,6 +731,7 @@ svga_create_query(struct pipe_context *pipe, case SVGA_QUERY_MAP_BUFFER_TIME: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: break; default: assert(!"unexpected query type in svga_create_query()"); @@ -797,6 +798,7 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_MAP_BUFFER_TIME: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: /* nothing */ break; default: @@ -876,6 +878,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_RESOURCES_MAPPED: sq->begin_count = svga->hud.num_resources_mapped; break; + case SVGA_QUERY_NUM_BYTES_UPLOADED: + sq->begin_count = svga->hud.num_bytes_uploaded; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -966,6 +971,9 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_RESOURCES_MAPPED: sq->end_count = svga->hud.num_resources_mapped; break; + case SVGA_QUERY_NUM_BYTES_UPLOADED: + sq->end_count = svga->hud.num_bytes_uploaded; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -1061,6 +1069,7 @@ svga_get_query_result(struct pipe_context *pipe, case SVGA_QUERY_NUM_FLUSHES: case SVGA_QUERY_NUM_VALIDATIONS: case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: case SVGA_QUERY_MAP_BUFFER_TIME: vresult->u64 = sq->end_count - sq->begin_count; break; diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 71f2f4f2779..449cc149a81 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -80,6 +80,11 @@ svga_buffer_transfer_map(struct pipe_context *pipe, uint8_t *map; int64_t begin = os_time_get(); + assert(box->y == 0); + assert(box->z == 0); + assert(box->height == 1); + assert(box->depth == 1); + transfer = CALLOC_STRUCT(pipe_transfer); if (transfer == NULL) { return NULL; diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 69e5f75e208..8c5cff5abc1 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -429,6 +429,8 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); + + svga->hud.num_bytes_uploaded += box->w; } } else { @@ -454,6 +456,8 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); + + svga->hud.num_bytes_uploaded += box->w; } } diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index a02d1e495ff..81594777258 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -380,6 +380,12 @@ svga_texture_transfer_map(struct pipe_context *pipe, break; } + if (usage & PIPE_TRANSFER_WRITE) { + /* record texture upload for HUD */ + svga->hud.num_bytes_uploaded += + nblocksx * nblocksy * d * util_format_get_blocksize(texture->format); + } + if (!use_direct_map) { /* Use a DMA buffer */ st->hw_nblocksy = nblocksy; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index a80bc9b9119..09a3d33552b 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -789,6 +789,8 @@ svga_get_driver_query_info(struct pipe_screen *screen, {"map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS}, {"num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED, {0}}, + {"num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED, {0}, + PIPE_DRIVER_QUERY_TYPE_BYTES, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE}, /* running total counters */ {"memory-used", SVGA_QUERY_MEMORY_USED, {0}, diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c index 611d2c6102f..c5d52bbfd14 100644 --- a/src/gallium/drivers/svga/svga_state_sampler.c +++ b/src/gallium/drivers/svga/svga_state_sampler.c @@ -108,6 +108,9 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, PIPE_BIND_SAMPLER_VIEW); assert(format != SVGA3D_FORMAT_INVALID); + /* Convert the format to a sampler-friendly format, if needed */ + format = svga_sampler_format(format); + if (texture->target == PIPE_BUFFER) { viewDesc.buffer.firstElement = sv->base.u.buf.first_element; viewDesc.buffer.numElements = (sv->base.u.buf.last_element - diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 8b02680c77e..62a51e9a94d 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -456,9 +456,6 @@ trace_screen_create(struct pipe_screen *screen) { struct trace_screen *tr_scr; - if(!screen) - goto error1; - if (!trace_enabled()) goto error1; diff --git a/src/gallium/drivers/vc4/Automake.inc b/src/gallium/drivers/vc4/Automake.inc index 6fa3e190cac..5664c2ab14e 100644 --- a/src/gallium/drivers/vc4/Automake.inc +++ b/src/gallium/drivers/vc4/Automake.inc @@ -6,8 +6,4 @@ TARGET_LIB_DEPS += \ $(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \ $(top_builddir)/src/gallium/drivers/vc4/libvc4.la -if USE_VC4_SIMULATOR -TARGET_CPPFLAGS += -DUSE_VC4_SIMULATOR -endif - endif diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am index f4a57ba3404..a3bf72fc72a 100644 --- a/src/gallium/drivers/vc4/Makefile.am +++ b/src/gallium/drivers/vc4/Makefile.am @@ -23,7 +23,6 @@ include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc if USE_VC4_SIMULATOR -SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1 SIM_LDFLAGS = -lsimpenrose endif diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 373c9e12d11..0672a92226f 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -509,8 +509,8 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color); nir_ssa_def *src_color[4], *unpacked_dst_color[4]; for (unsigned i = 0; i < 4; i++) { - src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false); - unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false); + src_color[i] = nir_channel(b, intr->src[0].ssa, i); + unpacked_dst_color[i] = nir_channel(b, dst_vec4, i); } vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 7ea263afb68..1afe52a63f4 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -84,7 +84,7 @@ vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan) static nir_ssa_def * vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan) { - return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false); + return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan); } static nir_ssa_def * @@ -326,9 +326,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, intr_comp->const_index[0] = intr->const_index[0] * 4 + i; assert(intr->src[0].is_ssa); - intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b, - intr->src[0].ssa, - &i, 1, false)); + intr_comp->src[0] = + nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); nir_builder_instr_insert(b, &intr_comp->instr); } diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index f1bab810eff..07a92266dd2 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -144,6 +144,8 @@ qir_opt_algebraic(struct vc4_compile *c) case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: if (is_zero(c, inst->src[1])) { /* Replace references to a 0 uniform value * with the SEL_X_0 equivalent. diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index a48dad804e2..197577b6c20 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -987,6 +987,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) qir_SF(c, qir_SUB(c, src[0], src[1])); *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0)); break; + case nir_op_uge: + qir_SF(c, qir_SUB(c, src[0], src[1])); + *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0)); + break; case nir_op_ilt: qir_SF(c, qir_SUB(c, src[0], src[1])); *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0)); @@ -1167,7 +1171,7 @@ emit_point_size_write(struct vc4_compile *c) struct qreg point_size; if (c->output_point_size_index != -1) - point_size = c->outputs[c->output_point_size_index + 3]; + point_size = c->outputs[c->output_point_size_index]; else point_size = qir_uniform_f(c, 1.0); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 7894b081b19..f2855e159fc 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -69,10 +69,14 @@ static const struct qir_op_info qir_op_info[] = { [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true }, [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true }, [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true }, + [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true }, + [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true }, [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true }, [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true }, [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true }, [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true }, + [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true }, + [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true }, [QOP_RCP] = { "rcp", 1, 1, false, true }, [QOP_RSQ] = { "rsq", 1, 1, false, true }, @@ -218,10 +222,14 @@ qir_depends_on_flags(struct qinst *inst) case QOP_SEL_X_0_NC: case QOP_SEL_X_0_ZS: case QOP_SEL_X_0_ZC: + case QOP_SEL_X_0_CS: + case QOP_SEL_X_0_CC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: case QOP_SEL_X_Y_ZS: case QOP_SEL_X_Y_ZC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: return true; default: return false; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index a92ad93ee07..ddb35e41fcf 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -99,11 +99,15 @@ enum qop { QOP_SEL_X_0_ZC, QOP_SEL_X_0_NS, QOP_SEL_X_0_NC, + QOP_SEL_X_0_CS, + QOP_SEL_X_0_CC, /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ QOP_SEL_X_Y_ZS, QOP_SEL_X_Y_ZC, QOP_SEL_X_Y_NS, QOP_SEL_X_Y_NC, + QOP_SEL_X_Y_CS, + QOP_SEL_X_Y_CC, QOP_FTOI, QOP_ITOF, @@ -567,10 +571,14 @@ QIR_ALU1(SEL_X_0_ZS) QIR_ALU1(SEL_X_0_ZC) QIR_ALU1(SEL_X_0_NS) QIR_ALU1(SEL_X_0_NC) +QIR_ALU1(SEL_X_0_CS) +QIR_ALU1(SEL_X_0_CC) QIR_ALU2(SEL_X_Y_ZS) QIR_ALU2(SEL_X_Y_ZC) QIR_ALU2(SEL_X_Y_NS) QIR_ALU2(SEL_X_Y_NC) +QIR_ALU2(SEL_X_Y_CS) +QIR_ALU2(SEL_X_Y_CC) QIR_ALU2(FMIN) QIR_ALU2(FMAX) QIR_ALU2(FMINABS) diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c index f087c3b81b5..a57e100593c 100644 --- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c @@ -22,14 +22,10 @@ */ /** - * @file vc4_opt_algebraic.c + * @file vc4_qir_lower_uniforms.c * - * This is the optimization pass for miscellaneous changes to instructions - * where we can simplify the operation by some knowledge about the specific - * operations. - * - * Mostly this will be a matter of turning things into MOVs so that they can - * later be copy-propagated out. + * This is the pre-code-generation pass for fixing up instructions that try to + * read from multiple uniform values. */ #include "vc4_qir.h" @@ -85,6 +81,33 @@ is_lowerable_uniform(struct qinst *inst, int i) return true; } +/* Returns the number of different uniform values referenced by the + * instruction. + */ +static uint32_t +qir_get_instruction_uniform_count(struct qinst *inst) +{ + uint32_t count = 0; + + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file != QFILE_UNIF) + continue; + + bool is_duplicate = false; + for (int j = 0; j < i; j++) { + if (inst->src[j].file == QFILE_UNIF && + inst->src[j].index == inst->src[i].index) { + is_duplicate = true; + break; + } + } + if (!is_duplicate) + count++; + } + + return count; +} + void qir_lower_uniforms(struct vc4_compile *c) { @@ -98,13 +121,7 @@ qir_lower_uniforms(struct vc4_compile *c) list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); - uint32_t count = 0; - for (int i = 0; i < nsrc; i++) { - if (inst->src[i].file == QFILE_UNIF) - count++; - } - - if (count <= 1) + if (qir_get_instruction_uniform_count(inst) <= 1) continue; for (int i = 0; i < nsrc; i++) { @@ -140,23 +157,22 @@ qir_lower_uniforms(struct vc4_compile *c) list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); - uint32_t count = 0; - for (int i = 0; i < nsrc; i++) { - if (inst->src[i].file == QFILE_UNIF) - count++; - } + uint32_t count = qir_get_instruction_uniform_count(inst); if (count <= 1) continue; + bool removed = false; for (int i = 0; i < nsrc; i++) { if (is_lowerable_uniform(inst, i) && inst->src[i].index == max_index) { inst->src[i] = temp; remove_uniform(ht, unif); - count--; + removed = true; } } + if (removed) + count--; /* If the instruction doesn't need lowering any more, * then drop it from the list. diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 133e1385178..e0d3633da42 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -311,6 +311,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_0_ZC: case QOP_SEL_X_0_NS: case QOP_SEL_X_0_NC: + case QOP_SEL_X_0_CS: + case QOP_SEL_X_0_CC: queue(c, qpu_a_MOV(dst, src[0]) | unpack); set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + QPU_COND_ZS); @@ -324,6 +326,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: queue(c, qpu_a_MOV(dst, src[0])); if (qinst->src[0].pack) *(last_inst(c)) |= unpack; diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c index 7f11fba2340..85a0c95e851 100644 --- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c @@ -44,18 +44,28 @@ qir_reorder_uniforms(struct vc4_compile *c) uint32_t next_uniform = 0; list_for_each_entry(struct qinst, inst, &c->instructions, link) { + uint32_t new = ~0; + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file != QFILE_UNIF) continue; - uint32_t new = next_uniform++; - if (uniform_index_size <= new) { - uniform_index_size = - MAX2(uniform_index_size * 2, 16); - uniform_index = - realloc(uniform_index, - uniform_index_size * - sizeof(uint32_t)); + if (new == ~0) { + new = next_uniform++; + if (uniform_index_size <= new) { + uniform_index_size = + MAX2(uniform_index_size * 2, 16); + uniform_index = + realloc(uniform_index, + uniform_index_size * + sizeof(uint32_t)); + } + } else { + /* If we've got two uniform references in this + * instruction, they need to be the same + * uniform value. + */ + assert(inst->src[i].index == uniform_index[new]); } uniform_index[new] = inst->src[i].index; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 27f358f8fb9..be7447de67d 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -116,6 +116,25 @@ struct pipe_context { unsigned query_type, unsigned index ); + /** + * Create a query object that queries all given query types simultaneously. + * + * This can only be used for those query types for which + * get_driver_query_info indicates that it must be used. Only one batch + * query object may be active at a time. + * + * There may be additional constraints on which query types can be used + * together, in particular those that are implied by + * get_driver_query_group_info. + * + * \param num_queries the number of query types + * \param query_types array of \p num_queries query types + * \return a query object, or NULL on error. + */ + struct pipe_query *(*create_batch_query)( struct pipe_context *pipe, + unsigned num_queries, + unsigned *query_types ); + void (*destroy_query)(struct pipe_context *pipe, struct pipe_query *q); diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 7240154727e..b3c8b9f7360 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics }; /** + * For batch queries. + */ +union pipe_numeric_type_union +{ + uint64_t u64; + uint32_t u32; + float f; +}; + +/** * Query result (returned by pipe_context::get_query_result). */ union pipe_query_result @@ -791,6 +801,8 @@ union pipe_query_result /* PIPE_QUERY_PRIMITIVES_GENERATED */ /* PIPE_QUERY_PRIMITIVES_EMITTED */ /* PIPE_DRIVER_QUERY_TYPE_UINT64 */ + /* PIPE_DRIVER_QUERY_TYPE_BYTES */ + /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */ /* PIPE_DRIVER_QUERY_TYPE_HZ */ uint64_t u64; @@ -809,6 +821,9 @@ union pipe_query_result /* PIPE_QUERY_PIPELINE_STATISTICS */ struct pipe_query_data_pipeline_statistics pipeline_statistics; + + /* batch queries */ + union pipe_numeric_type_union batch[0]; }; union pipe_color_union @@ -829,12 +844,6 @@ enum pipe_driver_query_type PIPE_DRIVER_QUERY_TYPE_HZ = 6, }; -enum pipe_driver_query_group_type -{ - PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0, - PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1, -}; - /* Whether an average value per frame or a cumulative value should be * displayed. */ @@ -844,12 +853,13 @@ enum pipe_driver_query_result_type PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1, }; -union pipe_numeric_type_union -{ - uint64_t u64; - uint32_t u32; - float f; -}; +/** + * Some hardware requires some hardware-specific queries to be submitted + * as batched queries. The corresponding query objects are created using + * create_batch_query, and at most one such query may be active at + * any time. + */ +#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0) struct pipe_driver_query_info { @@ -859,12 +869,12 @@ struct pipe_driver_query_info enum pipe_driver_query_type type; enum pipe_driver_query_result_type result_type; unsigned group_id; + unsigned flags; }; struct pipe_driver_query_group_info { const char *name; - enum pipe_driver_query_group_type type; unsigned max_active_queries; unsigned num_queries; }; diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 5f0690e5ae6..d9c9f9b5cc2 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -359,6 +359,36 @@ enum pipe_format { PIPE_FORMAT_ETC2_RG11_UNORM = 277, PIPE_FORMAT_ETC2_RG11_SNORM = 278, + PIPE_FORMAT_ASTC_4x4 = 279, + PIPE_FORMAT_ASTC_5x4 = 280, + PIPE_FORMAT_ASTC_5x5 = 281, + PIPE_FORMAT_ASTC_6x5 = 282, + PIPE_FORMAT_ASTC_6x6 = 283, + PIPE_FORMAT_ASTC_8x5 = 284, + PIPE_FORMAT_ASTC_8x6 = 285, + PIPE_FORMAT_ASTC_8x8 = 286, + PIPE_FORMAT_ASTC_10x5 = 287, + PIPE_FORMAT_ASTC_10x6 = 288, + PIPE_FORMAT_ASTC_10x8 = 289, + PIPE_FORMAT_ASTC_10x10 = 290, + PIPE_FORMAT_ASTC_12x10 = 291, + PIPE_FORMAT_ASTC_12x12 = 292, + + PIPE_FORMAT_ASTC_4x4_SRGB = 293, + PIPE_FORMAT_ASTC_5x4_SRGB = 294, + PIPE_FORMAT_ASTC_5x5_SRGB = 295, + PIPE_FORMAT_ASTC_6x5_SRGB = 296, + PIPE_FORMAT_ASTC_6x6_SRGB = 297, + PIPE_FORMAT_ASTC_8x5_SRGB = 298, + PIPE_FORMAT_ASTC_8x6_SRGB = 299, + PIPE_FORMAT_ASTC_8x8_SRGB = 300, + PIPE_FORMAT_ASTC_10x5_SRGB = 301, + PIPE_FORMAT_ASTC_10x6_SRGB = 302, + PIPE_FORMAT_ASTC_10x8_SRGB = 303, + PIPE_FORMAT_ASTC_10x10_SRGB = 304, + PIPE_FORMAT_ASTC_12x10_SRGB = 305, + PIPE_FORMAT_ASTC_12x12_SRGB = 306, + PIPE_FORMAT_COUNT }; diff --git a/src/gallium/include/state_tracker/drm_driver.h b/src/gallium/include/state_tracker/drm_driver.h index 740c4bbe1a6..959a7625e30 100644 --- a/src/gallium/include/state_tracker/drm_driver.h +++ b/src/gallium/include/state_tracker/drm_driver.h @@ -117,10 +117,4 @@ struct drm_driver_descriptor driver_descriptor = { \ .configuration = (conf), \ }; -extern struct pipe_screen *dd_create_screen(int fd); - -extern const char *dd_driver_name(void); - -extern const struct drm_conf_ret *dd_configuration(enum drm_conf conf); - #endif diff --git a/src/gallium/include/state_tracker/sw_driver.h b/src/gallium/include/state_tracker/sw_driver.h new file mode 100644 index 00000000000..0eb2b44d6fd --- /dev/null +++ b/src/gallium/include/state_tracker/sw_driver.h @@ -0,0 +1,21 @@ + +#ifndef _SW_DRIVER_H_ +#define _SW_DRIVER_H_ + +#include "pipe/p_compiler.h" + +struct pipe_screen; +struct sw_winsys; + +struct sw_driver_descriptor +{ + struct pipe_screen *(*create_screen)(struct sw_winsys *ws); + struct { + const char * const name; + struct sw_winsys *(*create_winsys)(); + } winsys[]; +}; + +extern struct sw_driver_descriptor swrast_driver_descriptor; + +#endif diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am index fd0ccf88cc5..3c9421692fc 100644 --- a/src/gallium/state_trackers/clover/Makefile.am +++ b/src/gallium/state_trackers/clover/Makefile.am @@ -1,8 +1,6 @@ include Makefile.sources AM_CPPFLAGS = \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/gallium/include \ diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 6efff79c7f4..1be2f6413f4 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -41,7 +41,7 @@ namespace { device::device(clover::platform &platform, pipe_loader_device *ldev) : platform(platform), ldev(ldev) { - pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR); + pipe = pipe_loader_create_screen(ldev); if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE)) { if (pipe) pipe->destroy(pipe); diff --git a/src/gallium/state_trackers/dri/Android.mk b/src/gallium/state_trackers/dri/Android.mk index 43f0de9b464..f0eb18dcacf 100644 --- a/src/gallium/state_trackers/dri/Android.mk +++ b/src/gallium/state_trackers/dri/Android.mk @@ -29,9 +29,6 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(common_SOURCES) -LOCAL_CFLAGS := \ - -DGALLIUM_STATIC_TARGETS=1 \ - LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa \ diff --git a/src/gallium/state_trackers/dri/Makefile.am b/src/gallium/state_trackers/dri/Makefile.am index 9f4deba0c1e..74bccaa6416 100644 --- a/src/gallium/state_trackers/dri/Makefile.am +++ b/src/gallium/state_trackers/dri/Makefile.am @@ -25,8 +25,6 @@ include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc AM_CPPFLAGS = \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ @@ -36,15 +34,10 @@ AM_CPPFLAGS = \ $(LIBDRM_CFLAGS) \ $(VISIBILITY_CFLAGS) -if HAVE_GALLIUM_STATIC_TARGETS -AM_CPPFLAGS += \ - -DGALLIUM_STATIC_TARGETS=1 - if HAVE_GALLIUM_SOFTPIPE AM_CPPFLAGS += \ -DGALLIUM_SOFTPIPE endif # HAVE_GALLIUM_SOFTPIPE -endif # HAVE_GALLIUM_STATIC_TARGETS noinst_LTLIBRARIES = libdri.la libdri_la_SOURCES = $(common_SOURCES) diff --git a/src/gallium/state_trackers/dri/SConscript b/src/gallium/state_trackers/dri/SConscript index 657300baf13..fa48fb8a0d7 100644 --- a/src/gallium/state_trackers/dri/SConscript +++ b/src/gallium/state_trackers/dri/SConscript @@ -15,10 +15,6 @@ env.Append(CPPPATH = [ xmlpool_options.dir.dir, # Dir to generated xmlpool/options.h ]) -env.Append(CPPDEFINES = [ - ('GALLIUM_STATIC_TARGETS', '1'), -]) - sources = env.ParseSourceList('Makefile.sources', 'common_SOURCES') # XXX: if HAVE_DRISW diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 019414b56fe..beb0866c83f 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -1444,8 +1444,8 @@ dri2_init_screen(__DRIscreen * sPriv) const __DRIconfig **configs; struct dri_screen *screen; struct pipe_screen *pscreen = NULL; - const struct drm_conf_ret *throttle_ret = NULL; - const struct drm_conf_ret *dmabuf_ret = NULL; + const struct drm_conf_ret *throttle_ret; + const struct drm_conf_ret *dmabuf_ret; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -1457,19 +1457,14 @@ dri2_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; -#if GALLIUM_STATIC_TARGETS - pscreen = dd_create_screen(screen->fd); + if (pipe_loader_drm_probe_fd(&screen->dev, dup(screen->fd))) + pscreen = pipe_loader_create_screen(screen->dev); - throttle_ret = dd_configuration(DRM_CONF_THROTTLE); - dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD); -#else - if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) { - pscreen = pipe_loader_create_screen(screen->dev, PIPE_SEARCH_DIR); + if (!pscreen) + goto fail; - throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE); - dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD); - } -#endif // GALLIUM_STATIC_TARGETS + throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE); + dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD); if (throttle_ret && throttle_ret->val.val_int != -1) { screen->throttling_enabled = TRUE; @@ -1486,20 +1481,14 @@ dri2_init_screen(__DRIscreen * sPriv) } } - if (pscreen && pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) { + if (pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) { sPriv->extensions = dri_robust_screen_extensions; screen->has_reset_status_query = true; } else sPriv->extensions = dri_screen_extensions; - /* dri_init_screen_helper checks pscreen for us */ - -#if GALLIUM_STATIC_TARGETS - configs = dri_init_screen_helper(screen, pscreen, dd_driver_name()); -#else configs = dri_init_screen_helper(screen, pscreen, screen->dev->driver_name); -#endif // GALLIUM_STATIC_TARGETS if (!configs) goto fail; @@ -1511,10 +1500,8 @@ dri2_init_screen(__DRIscreen * sPriv) return configs; fail: dri_destroy_screen_helper(screen); -#if !GALLIUM_STATIC_TARGETS if (screen->dev) pipe_loader_release(&screen->dev, 1); -#endif // !GALLIUM_STATIC_TARGETS FREE(screen); return NULL; } @@ -1527,7 +1514,6 @@ fail: static const __DRIconfig ** dri_kms_init_screen(__DRIscreen * sPriv) { -#if GALLIUM_STATIC_TARGETS #if defined(GALLIUM_SOFTPIPE) const __DRIconfig **configs; struct dri_screen *screen; @@ -1543,7 +1529,11 @@ dri_kms_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; - pscreen = kms_swrast_create_screen(screen->fd); + if (pipe_loader_sw_probe_kms(&screen->dev, dup(screen->fd))) + pscreen = pipe_loader_create_screen(screen->dev); + + if (!pscreen) + goto fail; if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 && (cap & DRM_PRIME_CAP_IMPORT)) { @@ -1553,7 +1543,6 @@ dri_kms_init_screen(__DRIscreen * sPriv) sPriv->extensions = dri_screen_extensions; - /* dri_init_screen_helper checks pscreen for us */ configs = dri_init_screen_helper(screen, pscreen, "swrast"); if (!configs) goto fail; @@ -1566,9 +1555,10 @@ dri_kms_init_screen(__DRIscreen * sPriv) return configs; fail: dri_destroy_screen_helper(screen); + if (screen->dev) + pipe_loader_release(&screen->dev, 1); FREE(screen); #endif // GALLIUM_SOFTPIPE -#endif // GALLIUM_STATIC_TARGETS return NULL; } diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index c4c2d9c8fb1..2ac55c88926 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -390,9 +390,7 @@ dri_destroy_screen(__DRIscreen * sPriv) dri_destroy_screen_helper(screen); -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&screen->dev, 1); -#endif // !GALLIUM_STATIC_TARGETS free(screen); sPriv->driverPrivate = NULL; @@ -416,11 +414,6 @@ dri_init_screen_helper(struct dri_screen *screen, const char* driver_name) { screen->base.screen = pscreen; - if (!screen->base.screen) { - debug_printf("%s: failed to create pipe_screen\n", __FUNCTION__); - return NULL; - } - screen->base.get_egl_image = dri_get_egl_image; screen->base.get_param = dri_get_param; diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c index 753c59d696a..b85a73c57d2 100644 --- a/src/gallium/state_trackers/dri/drisw.c +++ b/src/gallium/state_trackers/dri/drisw.c @@ -39,6 +39,7 @@ #include "util/u_inlines.h" #include "util/u_box.h" #include "pipe/p_context.h" +#include "pipe-loader/pipe_loader.h" #include "state_tracker/drisw_api.h" #include "state_tracker/st_context.h" @@ -382,7 +383,7 @@ drisw_init_screen(__DRIscreen * sPriv) { const __DRIconfig **configs; struct dri_screen *screen; - struct pipe_screen *pscreen; + struct pipe_screen *pscreen = NULL; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -396,8 +397,11 @@ drisw_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; sPriv->extensions = drisw_screen_extensions; - pscreen = drisw_create_screen(&drisw_lf); - /* dri_init_screen_helper checks pscreen for us */ + if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf)) + pscreen = pipe_loader_create_screen(screen->dev); + + if (!pscreen) + goto fail; configs = dri_init_screen_helper(screen, pscreen, "swrast"); if (!configs) @@ -406,6 +410,8 @@ drisw_init_screen(__DRIscreen * sPriv) return configs; fail: dri_destroy_screen_helper(screen); + if (screen->dev) + pipe_loader_release(&screen->dev, 1); FREE(screen); return NULL; } diff --git a/src/gallium/state_trackers/omx/entrypoint.c b/src/gallium/state_trackers/omx/entrypoint.c index 7df90b16a84..da9ca104d93 100644 --- a/src/gallium/state_trackers/omx/entrypoint.c +++ b/src/gallium/state_trackers/omx/entrypoint.c @@ -33,6 +33,7 @@ #include <assert.h> #include <string.h> +#include <stdbool.h> #include <X11/Xlib.h> @@ -73,29 +74,30 @@ int omx_component_library_Setup(stLoaderComponentType **stComponents) struct vl_screen *omx_get_screen(void) { + static bool first_time = true; pipe_mutex_lock(omx_lock); - if (!omx_display) { - omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL); - if (!omx_render_node) { - omx_display = XOpenDisplay(NULL); - if (!omx_display) - goto error; - } - } - if (!omx_screen) { + if (first_time) { + omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL); + first_time = false; + } if (omx_render_node) { drm_fd = loader_open_device(omx_render_node); if (drm_fd < 0) goto error; + omx_screen = vl_drm_screen_create(drm_fd); if (!omx_screen) { close(drm_fd); goto error; } } else { - omx_screen = vl_screen_create(omx_display, 0); + omx_display = XOpenDisplay(NULL); + if (!omx_display) + goto error; + + omx_screen = vl_dri2_screen_create(omx_display, 0); if (!omx_screen) { XCloseDisplay(omx_display); goto error; @@ -117,16 +119,13 @@ void omx_put_screen(void) { pipe_mutex_lock(omx_lock); if ((--omx_usecount) == 0) { - if (!omx_render_node) { - vl_screen_destroy(omx_screen); - if (omx_display) - XCloseDisplay(omx_display); - } else { - close(drm_fd); - vl_drm_screen_destroy(omx_screen); - } + omx_screen->destroy(omx_screen); omx_screen = NULL; - omx_display = NULL; + + if (omx_render_node) + close(drm_fd); + else + XCloseDisplay(omx_display); } pipe_mutex_unlock(omx_lock); } diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index 98c4104da48..f0051e5f6a5 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -102,7 +102,6 @@ PUBLIC VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP ctx) { vlVaDriver *drv; - int drm_fd; struct drm_state *drm_info; if (!ctx) @@ -119,26 +118,20 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx) return VA_STATUS_ERROR_UNIMPLEMENTED; case VA_DISPLAY_GLX: case VA_DISPLAY_X11: - drv->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen); + drv->vscreen = vl_dri2_screen_create(ctx->native_dpy, ctx->x11_screen); if (!drv->vscreen) goto error_screen; break; case VA_DISPLAY_DRM: case VA_DISPLAY_DRM_RENDERNODES: { drm_info = (struct drm_state *) ctx->drm_state; - if (!drm_info) { - FREE(drv); - return VA_STATUS_ERROR_INVALID_PARAMETER; - } - - drm_fd = drm_info->fd; - if (drm_fd < 0) { + if (!drm_info || drm_info->fd < 0) { FREE(drv); return VA_STATUS_ERROR_INVALID_PARAMETER; } - drv->vscreen = vl_drm_screen_create(drm_fd); + drv->vscreen = vl_drm_screen_create(drm_info->fd); if (!drv->vscreen) goto error_screen; } @@ -182,10 +175,7 @@ error_htab: drv->pipe->destroy(drv->pipe); error_pipe: - if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11) - vl_screen_destroy(drv->vscreen); - else - vl_drm_screen_destroy(drv->vscreen); + drv->vscreen->destroy(drv->vscreen); error_screen: FREE(drv); @@ -322,10 +312,7 @@ vlVaTerminate(VADriverContextP ctx) vl_compositor_cleanup_state(&drv->cstate); vl_compositor_cleanup(&drv->compositor); drv->pipe->destroy(drv->pipe); - if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11) - vl_screen_destroy(drv->vscreen); - else - vl_drm_screen_destroy(drv->vscreen); + drv->vscreen->destroy(drv->vscreen); handle_table_destroy(drv->htab); FREE(drv); diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c index 5e7841a0521..a37a9b791db 100644 --- a/src/gallium/state_trackers/va/picture.c +++ b/src/gallium/state_trackers/va/picture.c @@ -763,7 +763,7 @@ handleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, v dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width; dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height; - dirty_area = vl_screen_get_dirty_area(drv->vscreen); + dirty_area = drv->vscreen->get_dirty_area(drv->vscreen); vl_compositor_clear_layers(&drv->cstate); vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE); diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 589d6860b6a..c052c8f2284 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -229,6 +229,7 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s struct pipe_screen *screen; struct pipe_resource *tex; struct pipe_surface surf_templ, *surf_draw; + struct vl_screen *vscreen; struct u_rect src_rect, *dirty_area; struct u_rect dst_rect = {destx, destx + destw, desty, desty + desth}; VAStatus status; @@ -242,17 +243,18 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s return VA_STATUS_ERROR_INVALID_SURFACE; screen = drv->pipe->screen; + vscreen = drv->vscreen; if(surf->fence) { screen->fence_finish(screen, surf->fence, PIPE_TIMEOUT_INFINITE); screen->fence_reference(screen, &surf->fence, NULL); } - tex = vl_screen_texture_from_drawable(drv->vscreen, (Drawable)draw); + tex = vscreen->texture_from_drawable(vscreen, draw); if (!tex) return VA_STATUS_ERROR_INVALID_DISPLAY; - dirty_area = vl_screen_get_dirty_area(drv->vscreen); + dirty_area = vscreen->get_dirty_area(vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; @@ -276,11 +278,8 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s if (status) return status; - screen->flush_frontbuffer - ( - screen, tex, 0, 0, - vl_screen_get_private(drv->vscreen), NULL - ); + screen->flush_frontbuffer(screen, tex, 0, 0, + vscreen->get_private(vscreen), NULL); screen->fence_reference(screen, &surf->fence, NULL); drv->pipe->flush(drv->pipe, &surf->fence, 0); diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c index 31c95054f56..c70cc6e2752 100644 --- a/src/gallium/state_trackers/vdpau/device.c +++ b/src/gallium/state_trackers/vdpau/device.c @@ -63,7 +63,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, pipe_reference_init(&dev->reference, 1); - dev->vscreen = vl_screen_create(display, screen); + dev->vscreen = vl_dri2_screen_create(display, screen); if (!dev->vscreen) { ret = VDP_STATUS_RESOURCES; goto no_vscreen; @@ -136,7 +136,7 @@ no_handle: no_resource: dev->context->destroy(dev->context); no_context: - vl_screen_destroy(dev->vscreen); + dev->vscreen->destroy(dev->vscreen); no_vscreen: FREE(dev); no_dev: @@ -227,7 +227,7 @@ vlVdpDeviceFree(vlVdpDevice *dev) vl_compositor_cleanup(&dev->compositor); pipe_sampler_view_reference(&dev->dummy_sv, NULL); dev->context->destroy(dev->context); - vl_screen_destroy(dev->vscreen); + dev->vscreen->destroy(dev->vscreen); FREE(dev); vlDestroyHTAB(); } diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c index e53303708b2..e7f387e6173 100644 --- a/src/gallium/state_trackers/vdpau/presentation.c +++ b/src/gallium/state_trackers/vdpau/presentation.c @@ -186,7 +186,8 @@ vlVdpPresentationQueueGetTime(VdpPresentationQueue presentation_queue, return VDP_STATUS_INVALID_HANDLE; pipe_mutex_lock(pq->device->mutex); - *current_time = vl_screen_get_timestamp(pq->device->vscreen, pq->drawable); + *current_time = pq->device->vscreen->get_timestamp(pq->device->vscreen, + (void *)pq->drawable); pipe_mutex_unlock(pq->device->mutex); return VDP_STATUS_OK; @@ -214,6 +215,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, struct vl_compositor *compositor; struct vl_compositor_state *cstate; + struct vl_screen *vscreen; pq = vlGetDataHTAB(presentation_queue); if (!pq) @@ -226,15 +228,16 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, pipe = pq->device->context; compositor = &pq->device->compositor; cstate = &pq->cstate; + vscreen = pq->device->vscreen; pipe_mutex_lock(pq->device->mutex); - tex = vl_screen_texture_from_drawable(pq->device->vscreen, pq->drawable); + tex = vscreen->texture_from_drawable(vscreen, (void *)pq->drawable); if (!tex) { pipe_mutex_unlock(pq->device->mutex); return VDP_STATUS_INVALID_HANDLE; } - dirty_area = vl_screen_get_dirty_area(pq->device->vscreen); + dirty_area = vscreen->get_dirty_area(vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; @@ -267,12 +270,9 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true); } - vl_screen_set_next_timestamp(pq->device->vscreen, earliest_presentation_time); - pipe->screen->flush_frontbuffer - ( - pipe->screen, tex, 0, 0, - vl_screen_get_private(pq->device->vscreen), NULL - ); + vscreen->set_next_timestamp(vscreen, earliest_presentation_time); + pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0, + vscreen->get_private(vscreen), NULL); pipe->screen->fence_reference(pipe->screen, &surf->fence, NULL); pipe->flush(pipe, &surf->fence, 0); diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am index 5051e8246e3..968778f995c 100644 --- a/src/gallium/state_trackers/xa/Makefile.am +++ b/src/gallium/state_trackers/xa/Makefile.am @@ -28,15 +28,6 @@ AM_CFLAGS = \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) -AM_CPPFLAGS = \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" - -if HAVE_GALLIUM_STATIC_TARGETS -AM_CPPFLAGS += \ - -DGALLIUM_STATIC_TARGETS=1 -endif - xa_includedir = $(includedir) xa_include_HEADERS = \ xa_composite.h \ diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c index 4fdbdc96ae6..faa630c144b 100644 --- a/src/gallium/state_trackers/xa/xa_tracker.c +++ b/src/gallium/state_trackers/xa/xa_tracker.c @@ -152,21 +152,13 @@ xa_tracker_create(int drm_fd) struct xa_tracker *xa = calloc(1, sizeof(struct xa_tracker)); enum xa_surface_type stype; unsigned int num_formats; - int loader_fd; if (!xa) return NULL; -#if GALLIUM_STATIC_TARGETS - xa->screen = dd_create_screen(drm_fd); - (void) loader_fd; /* silence unused var warning */ -#else - loader_fd = dup(drm_fd); - if (loader_fd == -1) - return NULL; - if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd)) - xa->screen = pipe_loader_create_screen(xa->dev, PIPE_SEARCH_DIR); -#endif + if (pipe_loader_drm_probe_fd(&xa->dev, dup(drm_fd))) + xa->screen = pipe_loader_create_screen(xa->dev); + if (!xa->screen) goto out_no_screen; @@ -214,10 +206,8 @@ xa_tracker_create(int drm_fd) out_no_pipe: xa->screen->destroy(xa->screen); out_no_screen: -#if !GALLIUM_STATIC_TARGETS if (xa->dev) pipe_loader_release(&xa->dev, 1); -#endif free(xa); return NULL; } @@ -228,9 +218,7 @@ xa_tracker_destroy(struct xa_tracker *xa) free(xa->supported_formats); xa_context_destroy(xa->default_ctx); xa->screen->destroy(xa->screen); -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&xa->dev, 1); -#endif free(xa); } diff --git a/src/gallium/state_trackers/xvmc/context.c b/src/gallium/state_trackers/xvmc/context.c index 4702b44d1f4..a6991ab8d61 100644 --- a/src/gallium/state_trackers/xvmc/context.c +++ b/src/gallium/state_trackers/xvmc/context.c @@ -229,7 +229,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, return BadAlloc; /* TODO: Reuse screen if process creates another context */ - vscreen = vl_screen_create(dpy, scrn); + vscreen = vl_dri2_screen_create(dpy, scrn); if (!vscreen) { XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL screen.\n"); @@ -240,7 +240,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, pipe = vscreen->pscreen->context_create(vscreen->pscreen, vscreen, 0); if (!pipe) { XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL context.\n"); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -258,7 +258,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, if (!context_priv->decoder) { XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL decoder.\n"); pipe->destroy(pipe); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -267,7 +267,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL compositor.\n"); context_priv->decoder->destroy(context_priv->decoder); pipe->destroy(pipe); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -277,7 +277,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, vl_compositor_cleanup(&context_priv->compositor); context_priv->decoder->destroy(context_priv->decoder); pipe->destroy(pipe); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -332,7 +332,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context) vl_compositor_cleanup_state(&context_priv->cstate); vl_compositor_cleanup(&context_priv->compositor); context_priv->pipe->destroy(context_priv->pipe); - vl_screen_destroy(context_priv->vscreen); + context_priv->vscreen->destroy(context_priv->vscreen); FREE(context_priv); context->privData = NULL; diff --git a/src/gallium/state_trackers/xvmc/surface.c b/src/gallium/state_trackers/xvmc/surface.c index 15eae59ff6e..199712ba168 100644 --- a/src/gallium/state_trackers/xvmc/surface.c +++ b/src/gallium/state_trackers/xvmc/surface.c @@ -355,6 +355,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, struct pipe_context *pipe; struct vl_compositor *compositor; struct vl_compositor_state *cstate; + struct vl_screen *vscreen; XvMCSurfacePrivate *surface_priv; XvMCContextPrivate *context_priv; @@ -386,9 +387,10 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, pipe = context_priv->pipe; compositor = &context_priv->compositor; cstate = &context_priv->cstate; + vscreen = context_priv->vscreen; - tex = vl_screen_texture_from_drawable(context_priv->vscreen, drawable); - dirty_area = vl_screen_get_dirty_area(context_priv->vscreen); + tex = vscreen->texture_from_drawable(vscreen, (void *)drawable); + dirty_area = vscreen->get_dirty_area(vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; @@ -444,11 +446,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface); - pipe->screen->flush_frontbuffer - ( - pipe->screen, tex, 0, 0, - vl_screen_get_private(context_priv->vscreen), NULL - ); + pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0, + vscreen->get_private(vscreen), NULL); if(dump_window == -1) { dump_window = debug_get_num_option("XVMC_DUMP", 0); diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am index b5221472ef0..d1d9829b6c5 100644 --- a/src/gallium/targets/d3dadapter9/Makefile.am +++ b/src/gallium/targets/d3dadapter9/Makefile.am @@ -34,19 +34,6 @@ AM_CFLAGS = \ $(GALLIUM_TARGET_CFLAGS) \ $(VISIBILITY_CFLAGS) -if HAVE_GALLIUM_STATIC_TARGETS -AM_CPPFLAGS = \ - -DNINE_TARGET \ - -DGALLIUM_STATIC_TARGETS=1 - -else - -AM_CPPFLAGS = \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \ - $(GALLIUM_PIPE_LOADER_DEFINES) - -endif - ninedir = $(D3D_DRIVER_INSTALL_DIR) nine_LTLIBRARIES = d3dadapter9.la @@ -78,7 +65,6 @@ d3dadapter9_la_LIBADD = \ $(top_builddir)/src/glsl/libnir.la \ $(top_builddir)/src/gallium/state_trackers/nine/libninetracker.la \ $(top_builddir)/src/util/libmesautil.la \ - $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ $(EXPAT_LIBS) \ $(GALLIUM_COMMON_LIB_DEPS) @@ -87,7 +73,7 @@ EXTRA_DIST = d3dadapter9.sym TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc @@ -111,14 +97,16 @@ include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc if HAVE_GALLIUM_STATIC_TARGETS d3dadapter9_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS) -d3dadapter9_la_LIBADD += $(TARGET_LIB_DEPS) \ +d3dadapter9_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS d3dadapter9_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index fabc820f268..ad712db05eb 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -20,6 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* XXX: header order is slightly screwy here */ #include "loader.h" #include "adapter9.h" @@ -29,8 +30,7 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" -#include "target-helpers/inline_drm_helper.h" -#include "target-helpers/inline_sw_helper.h" +#include "target-helpers/drm_helper.h" #include "state_tracker/drm_driver.h" #include "d3dadapter/d3dadapter9.h" @@ -91,53 +91,15 @@ drm_destroy( struct d3dadapter9_context *ctx ) else if (ctx->hal) ctx->hal->destroy(ctx->hal); -#if !GALLIUM_STATIC_TARGETS if (drm->swdev) pipe_loader_release(&drm->swdev, 1); if (drm->dev) pipe_loader_release(&drm->dev, 1); -#endif close(drm->fd); FREE(ctx); } -/* read a DWORD in the form 0xnnnnnnnn, which is how sysfs pci id stuff is - * formatted. */ -static inline DWORD -read_file_dword( const char *name ) -{ - char buf[32]; - int fd, r; - - fd = open(name, O_RDONLY); - if (fd < 0) { - DBG("Unable to get PCI information from `%s'\n", name); - return 0; - } - - r = read(fd, buf, 32); - close(fd); - - return (r > 0) ? (DWORD)strtol(buf, NULL, 0) : 0; -} - -/* sysfs doesn't expose the revision as its own file, so this function grabs a - * dword at an offset in the raw PCI header. The reason this isn't used for all - * data is that the kernel will make corrections but not expose them in the raw - * header bytes. */ -static inline DWORD -read_config_dword( int fd, - unsigned offset ) -{ - DWORD r = 0; - - if (lseek(fd, offset, SEEK_SET) != offset) { return 0; } - if (read(fd, &r, 4) != 4) { return 0; } - - return r; -} - static inline void get_bus_info( int fd, DWORD *vendorid, @@ -215,26 +177,16 @@ drm_create_adapter( int fd, driOptionCache userInitOptions; int throttling_value_user = -2; -#if !GALLIUM_STATIC_TARGETS - const char *paths[] = { - getenv("D3D9_DRIVERS_PATH"), - getenv("D3D9_DRIVERS_DIR"), - PIPE_SEARCH_DIR - }; -#endif - if (!ctx) { return E_OUTOFMEMORY; } ctx->base.destroy = drm_destroy; + /* Although the fd is provided from external source, mesa/nine + * takes ownership of it. */ fd = loader_get_user_preferred_fd(fd, &different_device); ctx->fd = fd; ctx->base.linear_framebuffer = !!different_device; -#if GALLIUM_STATIC_TARGETS - ctx->base.hal = dd_create_screen(fd); -#else - /* use pipe-loader to dlopen appropriate drm driver */ if (!pipe_loader_drm_probe_fd(&ctx->dev, fd)) { ERR("Failed to probe drm fd %d.\n", fd); FREE(ctx); @@ -242,26 +194,15 @@ drm_create_adapter( int fd, return D3DERR_DRIVERINTERNALERROR; } - /* use pipe-loader to create a drm screen (hal) */ - ctx->base.hal = NULL; - for (i = 0; !ctx->base.hal && i < Elements(paths); ++i) { - if (!paths[i]) { continue; } - ctx->base.hal = pipe_loader_create_screen(ctx->dev, paths[i]); - } -#endif + ctx->base.hal = pipe_loader_create_screen(ctx->dev); if (!ctx->base.hal) { ERR("Unable to load requested driver.\n"); drm_destroy(&ctx->base); return D3DERR_DRIVERINTERNALERROR; } -#if GALLIUM_STATIC_TARGETS - dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD); - throttle_ret = dd_configuration(DRM_CONF_THROTTLE); -#else dmabuf_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_SHARE_FD); throttle_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_THROTTLE); -#endif // GALLIUM_STATIC_TARGETS if (!dmabuf_ret || !dmabuf_ret->val.val_bool) { ERR("The driver is not capable of dma-buf sharing." "Abandon to load nine state tracker\n"); @@ -308,18 +249,10 @@ drm_create_adapter( int fd, driDestroyOptionCache(&userInitOptions); driDestroyOptionInfo(&defaultInitOptions); -#if GALLIUM_STATIC_TARGETS - ctx->base.ref = ninesw_create_screen(ctx->base.hal); -#else /* wrap it to create a software screen that can share resources */ - if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) { - ctx->base.ref = NULL; - for (i = 0; !ctx->base.ref && i < Elements(paths); ++i) { - if (!paths[i]) { continue; } - ctx->base.ref = pipe_loader_create_screen(ctx->swdev, paths[i]); - } - } -#endif + if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) + ctx->base.ref = pipe_loader_create_screen(ctx->swdev); + if (!ctx->base.ref) { ERR("Couldn't wrap drm screen to swrast screen. Software devices " "will be unavailable.\n"); diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index a33d7f83671..2d9610ee9ab 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -35,7 +35,7 @@ endif LOCAL_SRC_FILES := target.c -LOCAL_CFLAGS := -DDRI_TARGET +LOCAL_CFLAGS := LOCAL_SHARED_LIBRARIES := \ libdl \ @@ -108,6 +108,7 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_dri_common \ libmesa_megadriver_stub \ libmesa_gallium \ + libmesa_pipe_loader \ libmesa_util \ libmesa_loader \ diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index 95efdd4451c..2666524fbfe 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -10,7 +10,6 @@ AM_CFLAGS = \ AM_CPPFLAGS = \ $(DEFINES) \ - -DDRI_TARGET \ -DGALLIUM_DDEBUG \ -DGALLIUM_NOOP \ -DGALLIUM_RBUG \ @@ -65,7 +64,7 @@ EXTRA_DIST = \ TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc @@ -92,14 +91,16 @@ if HAVE_GALLIUM_STATIC_TARGETS gallium_dri_la_SOURCES += target.c gallium_dri_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS) -gallium_dri_la_LIBADD += $(TARGET_LIB_DEPS) \ +gallium_dri_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS gallium_dri_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/dri/SConscript b/src/gallium/targets/dri/SConscript index 2fb0da09200..b4516598675 100644 --- a/src/gallium/targets/dri/SConscript +++ b/src/gallium/targets/dri/SConscript @@ -30,7 +30,6 @@ env.PkgUseModules('DRM') env.Append(CPPDEFINES = [ 'GALLIUM_VMWGFX', 'GALLIUM_SOFTPIPE', - 'DRI_TARGET', ]) env.Prepend(LIBS = [ @@ -39,6 +38,7 @@ env.Prepend(LIBS = [ svga, ws_dri, softpipe, + pipe_loader, libloader, mesautil, mesa, diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c index 32a11ef6281..d6fbd01b88f 100644 --- a/src/gallium/targets/dri/target.c +++ b/src/gallium/targets/dri/target.c @@ -1,2 +1,163 @@ -#include "target-helpers/inline_drm_helper.h" -#include "target-helpers/inline_sw_helper.h" +#include "target-helpers/drm_helper.h" + +#include "dri_screen.h" + +#if defined(GALLIUM_SOFTPIPE) + +const __DRIextension **__driDriverGetExtensions_swrast(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void) +{ + globalDriverAPI = &galliumsw_driver_api; + return galliumsw_driver_extensions; +} + +#if defined(HAVE_LIBDRM) + +const __DRIextension **__driDriverGetExtensions_kms_swrast(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void) +{ + globalDriverAPI = &dri_kms_driver_api; + return galliumdrm_driver_extensions; +} + +#endif +#endif + +#if defined(GALLIUM_I915) + +const __DRIextension **__driDriverGetExtensions_i915(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_ILO) + +const __DRIextension **__driDriverGetExtensions_i965(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_NOUVEAU) + +const __DRIextension **__driDriverGetExtensions_nouveau(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_R300) + +const __DRIextension **__driDriverGetExtensions_r300(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_R600) + +const __DRIextension **__driDriverGetExtensions_r600(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_RADEONSI) + +const __DRIextension **__driDriverGetExtensions_radeonsi(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_VMWGFX) + +const __DRIextension **__driDriverGetExtensions_vmwgfx(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_FREEDRENO) + +const __DRIextension **__driDriverGetExtensions_msm(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} + +const __DRIextension **__driDriverGetExtensions_kgsl(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_VIRGL) + +const __DRIextension **__driDriverGetExtensions_virtio_gpu(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_VC4) + +const __DRIextension **__driDriverGetExtensions_vc4(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} + +#if defined(USE_VC4_SIMULATOR) +const __DRIextension **__driDriverGetExtensions_i965(void); + +/** + * When building using the simulator (on x86), we advertise ourselves as the + * i965 driver so that you can just make a directory with a link from + * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that + * on your i965-using host to run the driver under simulation. + * + * This is, of course, incompatible with building with the ilo driver, but you + * shouldn't be building that anyway. + */ +PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif +#endif diff --git a/src/gallium/targets/omx/Makefile.am b/src/gallium/targets/omx/Makefile.am index a4dff487dd8..3bdb9eb7e61 100644 --- a/src/gallium/targets/omx/Makefile.am +++ b/src/gallium/targets/omx/Makefile.am @@ -40,7 +40,7 @@ if HAVE_GALLIUM_STATIC_TARGETS TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc @@ -50,14 +50,16 @@ include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc libomx_mesa_la_SOURCES += target.c libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libomx_mesa_la_LIBADD += $(TARGET_LIB_DEPS) \ +libomx_mesa_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS libomx_mesa_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/omx/target.c b/src/gallium/targets/omx/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/omx/target.c +++ b/src/gallium/targets/omx/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am index c78b26832ff..3cb29766724 100644 --- a/src/gallium/targets/opencl/Makefile.am +++ b/src/gallium/targets/opencl/Makefile.am @@ -15,11 +15,10 @@ lib@OPENCL_LIBNAME@_la_LDFLAGS += \ endif lib@OPENCL_LIBNAME@_la_LIBADD = \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \ $(top_builddir)/src/gallium/state_trackers/clover/libclover.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ $(ELF_LIB) \ -ldl \ -lclangCodeGen \ diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am index 4f25b4f6073..4bc3b55f26b 100644 --- a/src/gallium/targets/pipe-loader/Makefile.am +++ b/src/gallium/targets/pipe-loader/Makefile.am @@ -27,6 +27,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/gallium/drivers \ -I$(top_srcdir)/src/gallium/winsys \ + $(GALLIUM_PIPE_LOADER_DEFINES) \ $(LIBDRM_CFLAGS) \ $(VISIBILITY_CFLAGS) \ -DGALLIUM_RBUG \ @@ -208,6 +209,10 @@ AM_CPPFLAGS += -DGALLIUM_LLVMPIPE pipe_swrast_la_LIBADD += \ $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la endif + +pipe_swrast_la_LIBADD += \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + endif EXTRA_DIST = pipe.sym diff --git a/src/gallium/targets/pipe-loader/pipe.sym b/src/gallium/targets/pipe-loader/pipe.sym index 19b1d77b040..b2fa619f7de 100644 --- a/src/gallium/targets/pipe-loader/pipe.sym +++ b/src/gallium/targets/pipe-loader/pipe.sym @@ -1,7 +1,7 @@ { global: driver_descriptor; - swrast_create_screen; + swrast_driver_descriptor; local: *; }; diff --git a/src/gallium/targets/pipe-loader/pipe_swrast.c b/src/gallium/targets/pipe-loader/pipe_swrast.c index f7f354acf3f..cf617f37e20 100644 --- a/src/gallium/targets/pipe-loader/pipe_swrast.c +++ b/src/gallium/targets/pipe-loader/pipe_swrast.c @@ -1,7 +1,11 @@ #include "target-helpers/inline_sw_helper.h" #include "target-helpers/inline_debug_helper.h" -#include "state_tracker/drm_driver.h" +#include "state_tracker/sw_driver.h" +#include "sw/dri/dri_sw_winsys.h" +#include "sw/kms-dri/kms_dri_sw_winsys.h" +#include "sw/null/null_sw_winsys.h" +#include "sw/wrapper/wrapper_sw_winsys.h" PUBLIC struct pipe_screen * swrast_create_screen(struct sw_winsys *ws); @@ -17,3 +21,31 @@ swrast_create_screen(struct sw_winsys *ws) return screen; } + +PUBLIC +struct sw_driver_descriptor swrast_driver_descriptor = { + .create_screen = swrast_create_screen, + .winsys = { +#ifdef HAVE_PIPE_LOADER_DRI + { + .name = "dri", + .create_winsys = dri_create_sw_winsys, + }, +#endif +#ifdef HAVE_PIPE_LOADER_KMS + { + .name = "kms_dri", + .create_winsys = kms_dri_create_winsys, + }, +#endif + { + .name = "null", + .create_winsys = null_sw_create, + }, + { + .name = "wrapped", + .create_winsys = wrapper_sw_winsys_wrap_pipe_screen, + }, + { 0 }, + } +}; diff --git a/src/gallium/targets/va/Makefile.am b/src/gallium/targets/va/Makefile.am index 9613f041b58..733e7acb455 100644 --- a/src/gallium/targets/va/Makefile.am +++ b/src/gallium/targets/va/Makefile.am @@ -40,21 +40,23 @@ if HAVE_GALLIUM_STATIC_TARGETS TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc gallium_drv_video_la_SOURCES += target.c gallium_drv_video_la_CPPFLAGS = $(TARGET_CPPFLAGS) -gallium_drv_video_la_LIBADD += $(TARGET_LIB_DEPS) \ +gallium_drv_video_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS gallium_drv_video_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/va/target.c b/src/gallium/targets/va/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/va/target.c +++ b/src/gallium/targets/va/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am index 7eb62c1cc78..d388f8b5014 100644 --- a/src/gallium/targets/vdpau/Makefile.am +++ b/src/gallium/targets/vdpau/Makefile.am @@ -47,7 +47,7 @@ EXTRA_DIST = \ TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc @@ -59,14 +59,16 @@ if HAVE_GALLIUM_STATIC_TARGETS libvdpau_gallium_la_SOURCES += target.c libvdpau_gallium_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libvdpau_gallium_la_LIBADD += $(TARGET_LIB_DEPS) \ +libvdpau_gallium_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS libvdpau_gallium_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/vdpau/target.c b/src/gallium/targets/vdpau/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/vdpau/target.c +++ b/src/gallium/targets/vdpau/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am index 02c42c665ed..a63fd6903a4 100644 --- a/src/gallium/targets/xa/Makefile.am +++ b/src/gallium/targets/xa/Makefile.am @@ -60,7 +60,7 @@ if HAVE_GALLIUM_STATIC_TARGETS TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc @@ -74,13 +74,15 @@ include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc libxatracker_la_SOURCES += target.c libxatracker_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libxatracker_la_LIBADD += $(TARGET_LIB_DEPS) +libxatracker_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) else # HAVE_GALLIUM_STATIC_TARGETS libxatracker_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/xa/target.c b/src/gallium/targets/xa/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/xa/target.c +++ b/src/gallium/targets/xa/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am index b3285890822..fdc5f4b7318 100644 --- a/src/gallium/targets/xvmc/Makefile.am +++ b/src/gallium/targets/xvmc/Makefile.am @@ -38,7 +38,7 @@ EXTRA_DIST = xvmc.sym TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc @@ -48,13 +48,15 @@ if HAVE_GALLIUM_STATIC_TARGETS libXvMCgallium_la_SOURCES += target.c libXvMCgallium_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libXvMCgallium_la_LIBADD += $(TARGET_LIB_DEPS) \ +libXvMCgallium_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS libXvMCgallium_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/xvmc/target.c b/src/gallium/targets/xvmc/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/xvmc/target.c +++ b/src/gallium/targets/xvmc/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am index 56b7f3ffc66..585fb699e6c 100644 --- a/src/gallium/tests/trivial/Makefile.am +++ b/src/gallium/tests/trivial/Makefile.am @@ -5,17 +5,10 @@ PIPE_SRC_DIR = $(top_builddir)/src/gallium/targets/pipe-loader AM_CFLAGS = \ $(GALLIUM_CFLAGS) -AM_CPPFLAGS = \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/src/gallium/winsys \ - -DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)/.libs\" \ - $(GALLIUM_PIPE_LOADER_DEFINES) - LDADD = \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ $(GALLIUM_COMMON_LIB_DEPS) noinst_PROGRAMS = compute tri quad-tex diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c index b344f78b25c..bcdfb11c4f1 100644 --- a/src/gallium/tests/trivial/compute.c +++ b/src/gallium/tests/trivial/compute.c @@ -74,7 +74,7 @@ static void init_ctx(struct context *ctx) ret = pipe_loader_probe(&ctx->dev, 1); assert(ret); - ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR); + ctx->screen = pipe_loader_create_screen(ctx->dev); assert(ctx->screen); ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0); diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index f66f63043da..4c5a9200a52 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -96,7 +96,7 @@ static void init_prog(struct program *p) assert(ret); /* init a pipe screen */ - p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR); + p->screen = pipe_loader_create_screen(p->dev); assert(p->screen); /* create the pipe driver context and cso context */ diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index a555200842e..c71a63f44e5 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -91,7 +91,7 @@ static void init_prog(struct program *p) assert(ret); /* init a pipe screen */ - p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR); + p->screen = pipe_loader_create_screen(p->dev); assert(p->screen); /* create the pipe driver context and cso context */ |