diff options
author | Jason Ekstrand <[email protected]> | 2016-02-05 14:21:13 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-02-05 14:21:13 -0800 |
commit | 9645b8eb1f1b79e537ad8ddb683507df7bc9da58 (patch) | |
tree | 8e554a43a136b5f7951ff8734d42deb5e81c262b | |
parent | 3eebf3686be3de10cbeda8acd884e82df3e1438a (diff) | |
parent | 41875ac4edd8c884225c44c0840bd20291b410ca (diff) |
Merge branch mesa-public/master into vulkan
70 files changed, 1185 insertions, 311 deletions
diff --git a/Android.mk b/Android.mk index ed160fb3d0e..1d765590010 100644 --- a/Android.mk +++ b/Android.mk @@ -24,7 +24,7 @@ # BOARD_GPU_DRIVERS should be defined. The valid values are # # classic drivers: i915 i965 -# gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vc4 vmwgfx +# gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vc4 virgl vmwgfx # # The main target is libGLES_mesa. For each classic driver enabled, a DRI # module will also be built. DRI modules will be loaded by libGLES_mesa. @@ -46,7 +46,7 @@ MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk MESA_PYTHON2 := python classic_drivers := i915 i965 -gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx vc4 +gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx vc4 virgl MESA_GPU_DRIVERS := $(strip $(BOARD_GPU_DRIVERS)) diff --git a/docs/index.html b/docs/index.html index 4c6b2763cad..2b1e64673ac 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,7 +16,17 @@ <h1>News</h1> -<h2>January 13, 2015</h2> +<h2>January 22, 2016</h2> +<p> +<a href="relnotes/11.0.9.html">Mesa 11.0.9</a> is released. +This is a bug-fix release. +<br> +NOTE: It is anticipated that 11.0.9 will be the final release in the 11.0 +series. Users of 11.0 are encouraged to migrate to the 11.1 series in order +to obtain future fixes. +</p> + +<h2>January 13, 2016</h2> <p> <a href="relnotes/11.1.1.html">Mesa 11.1.1</a> is released. This is a bug-fix release. diff --git a/docs/relnotes.html b/docs/relnotes.html index 6ae05b61c4a..2f527a428f7 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release. </p> <ul> +<li><a href="relnotes/11.0.9.html">11.0.9 release notes</a> <li><a href="relnotes/11.1.1.html">11.1.1 release notes</a> <li><a href="relnotes/11.0.8.html">11.0.8 release notes</a> <li><a href="relnotes/11.1.0.html">11.1.0 release notes</a> diff --git a/docs/relnotes/11.0.9.html b/docs/relnotes/11.0.9.html new file mode 100644 index 00000000000..3bfb52b9d82 --- /dev/null +++ b/docs/relnotes/11.0.9.html @@ -0,0 +1,127 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.0.9 Release Notes / January 22, 2016</h1> + +<p> +Mesa 11.0.9 is a bug fix release which fixes bugs found since the 11.0.8 release. +</p> +<p> +Mesa 11.0.9 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +1597c2e983f476f98efdd6cd58b5298896d18479ff542bdeff28b98b129ede05 mesa-11.0.9.tar.gz +a1262ff1c66a16ccf341186cf0e57b306b8589eb2cc5ce92ffb6788ab01d2b01 mesa-11.0.9.tar.xz +</pre> + + +<h2>New features</h2> +<p>None</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92229">Bug 92229</a> - [APITRACE] SOMA have serious graphical errors</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93257">Bug 93257</a> - [SKL, bisected] ASTC dEQP tests segfault</li> + +</ul> + + +<h2>Changes</h2> + +<p>Emil Velikov (6):</p> +<ul> + <li>docs: add sha256 checksums for 11.0.8</li> + <li>cherry-ignore: add patch already in branch</li> + <li>cherry-ignore: add the dri3 glx null check patch</li> + <li>i915: correctly parse/set the context flags</li> + <li>egl/dri2: expose srgb configs when KHR_gl_colorspace is available</li> + <li>Update version to 11.0.9</li> +</ul> + +<p>Grazvydas Ignotas (1):</p> +<ul> + <li>r600: fix constant buffer size programming</li> +</ul> + +<p>Ilia Mirkin (5):</p> +<ul> + <li>nvc0: don't forget to reset VTX_TMP bufctx slot after blit completion</li> + <li>nv50/ir: float(s32 & 0xff) = float(u8), not s8</li> + <li>nv50,nvc0: make sure there's pushbuf space and that we ref the bo early</li> + <li>nv50,nvc0: fix crash when increasing bsp bo size for h264</li> + <li>nvc0: scale up inter_bo size so that it's 16M for a 4K video</li> +</ul> + +<p>Kenneth Graunke (2):</p> +<ul> + <li>ralloc: Fix ralloc_adopt() to the old context's last child's parent.</li> + <li>nvc0: Set winding order regardless of domain.</li> +</ul> + +<p>Marek Olšák (1):</p> +<ul> + <li>radeonsi: don't miss changes to SPI_TMPRING_SIZE</li> +</ul> + +<p>Miklós Máté (1):</p> +<ul> + <li>mesa: Don't leak ATIfs instructions in DeleteFragmentShader</li> +</ul> + +<p>Neil Roberts (1):</p> +<ul> + <li>i965: Fix crash when calling glViewport with no surface bound</li> +</ul> + +<p>Nicolai Hähnle (6):</p> +<ul> + <li>gallium/radeon: only dispose locally created target machine in radeon_llvm_compile</li> + <li>mesa/bufferobj: make _mesa_delete_buffer_object externally accessible</li> + <li>st/mesa: use _mesa_delete_buffer_object</li> + <li>radeon: use _mesa_delete_buffer_object</li> + <li>i915: use _mesa_delete_buffer_object</li> + <li>i965: use _mesa_delete_buffer_object</li> +</ul> + +<p>Oded Gabbay (1):</p> +<ul> + <li>llvmpipe: use vpkswss when dst is signed</li> +</ul> + +<p>Rob Herring (1):</p> +<ul> + <li>freedreno/ir3: fix 32-bit builds with pointer-to-int-cast error enabled</li> +</ul> + + +</div> +</body> +</html> diff --git a/include/pci_ids/virtio_gpu_pci_ids.h b/include/pci_ids/virtio_gpu_pci_ids.h new file mode 100644 index 00000000000..2e6ecaf1987 --- /dev/null +++ b/include/pci_ids/virtio_gpu_pci_ids.h @@ -0,0 +1 @@ +CHIPSET(0x0010, VIRTGL, VIRTGL) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index d34b16119e2..8f50f0ce573 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -235,6 +235,8 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, case __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE: srgb = value != 0; + if (!disp->Extensions.KHR_gl_colorspace && srgb) + return NULL; break; default: diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk index b406d4a5480..749be7dfeb9 100644 --- a/src/gallium/Android.mk +++ b/src/gallium/Android.mk @@ -83,6 +83,11 @@ ifneq ($(filter vc4, $(MESA_GPU_DRIVERS)),) SUBDIRS += winsys/vc4/drm drivers/vc4 endif +# virgl +ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),) +SUBDIRS += winsys/virgl/drm drivers/virgl +endif + # vmwgfx ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),) SUBDIRS += winsys/svga/drm drivers/svga diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c index 0d7ee9a1686..45e4e10d500 100644 --- a/src/gallium/drivers/ddebug/dd_draw.c +++ b/src/gallium/drivers/ddebug/dd_draw.c @@ -88,8 +88,9 @@ struct dd_call static FILE * dd_get_file_stream(struct dd_context *dctx) { + struct dd_screen *dscreen = dd_screen(dctx->base.screen); struct pipe_screen *screen = dctx->pipe->screen; - FILE *f = dd_get_debug_file(); + FILE *f = dd_get_debug_file(dscreen->verbose); if (!f) return NULL; @@ -602,6 +603,7 @@ static void dd_after_draw(struct dd_context *dctx, struct dd_call *call) { struct dd_screen *dscreen = dd_screen(dctx->base.screen); + struct pipe_context *pipe = dctx->pipe; if (dctx->num_draw_calls >= dscreen->skip_count) { switch (dscreen->mode) { @@ -615,6 +617,8 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call) } break; case DD_DUMP_ALL_CALLS: + if (!dscreen->no_flush) + pipe->flush(pipe, NULL, 0); dd_dump_call(dctx, call, 0); break; default: diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h index a045518dc16..80098dcb644 100644 --- a/src/gallium/drivers/ddebug/dd_pipe.h +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -45,6 +45,7 @@ struct dd_screen unsigned timeout_ms; enum dd_mode mode; bool no_flush; + bool verbose; unsigned skip_count; }; diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c index 2716845f58f..3706b2d63f5 100644 --- a/src/gallium/drivers/ddebug/dd_screen.c +++ b/src/gallium/drivers/ddebug/dd_screen.c @@ -270,7 +270,7 @@ ddebug_screen_create(struct pipe_screen *screen) { struct dd_screen *dscreen; const char *option = debug_get_option("GALLIUM_DDEBUG", NULL); - bool dump_always = option && !strcmp(option, "always"); + bool dump_always = option && !strncmp(option, "always", 6); bool no_flush = option && strstr(option, "noflush"); bool help = option && !strcmp(option, "help"); unsigned timeout = 0; @@ -280,15 +280,18 @@ ddebug_screen_create(struct pipe_screen *screen) puts(""); puts("Usage:"); puts(""); - puts(" GALLIUM_DDEBUG=always"); - puts(" Dump context and driver information after every draw call into"); + puts(" GALLIUM_DDEBUG=\"always [noflush] [verbose]\""); + puts(" Flush and dump context and driver information after every draw call into"); puts(" $HOME/"DD_DIR"/."); puts(""); - puts(" GALLIUM_DDEBUG=[timeout in ms] noflush"); + puts(" GALLIUM_DDEBUG=\"[timeout in ms] [noflush] [verbose]\""); puts(" Flush and detect a device hang after every draw call based on the given"); puts(" fence timeout and dump context and driver information into"); puts(" $HOME/"DD_DIR"/ when a hang is detected."); - puts(" If 'noflush' is specified, only detect hangs in pipe->flush."); + puts(""); + puts(" If 'noflush' is specified, do not flush on every draw call. In hang"); + puts(" detection mode, this only detect hangs in pipe->flush."); + puts(" If 'verbose' is specified, additional information is written to stderr."); puts(""); puts(" GALLIUM_DDEBUG_SKIP=[count]"); puts(" Skip flush and hang detection for the given initial number of draw calls."); @@ -339,6 +342,7 @@ ddebug_screen_create(struct pipe_screen *screen) dscreen->timeout_ms = timeout; dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS; dscreen->no_flush = no_flush; + dscreen->verbose = strstr(option, "verbose") != NULL; switch (dscreen->mode) { case DD_DUMP_ALL_CALLS: diff --git a/src/gallium/drivers/ddebug/dd_util.h b/src/gallium/drivers/ddebug/dd_util.h index c217c8eed68..093bdff4a92 100644 --- a/src/gallium/drivers/ddebug/dd_util.h +++ b/src/gallium/drivers/ddebug/dd_util.h @@ -40,7 +40,7 @@ #define DD_DIR "ddebug_dumps" static inline FILE * -dd_get_debug_file() +dd_get_debug_file(bool verbose) { static unsigned index; char proc_name[128], dir[256], name[512]; @@ -65,6 +65,9 @@ dd_get_debug_file() return NULL; } + if (verbose) + fprintf(stderr, "dd: dumping to file %s\n", name); + return f; } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index e926f56023f..4c066c14cd8 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -705,7 +705,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: if (ret) { uint32_t *max_compute_units = ret; - *max_compute_units = rscreen->info.max_compute_units; + *max_compute_units = rscreen->info.num_good_compute_units; } return sizeof(uint32_t); @@ -973,7 +973,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20)); printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20)); printf("max_sclk = %i\n", rscreen->info.max_sclk); - printf("max_compute_units = %i\n", rscreen->info.max_compute_units); + printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); printf("max_se = %i\n", rscreen->info.max_se); printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); printf("drm = %i.%i.%i\n", rscreen->info.drm_major, diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 27f6e983eea..d66e74f9254 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -236,6 +236,7 @@ struct r600_surface { /* Misc. color flags. */ bool alphatest_bypass; bool export_16bpc; + bool color_is_int8; /* Color registers. */ unsigned cb_color_info; @@ -252,6 +253,10 @@ struct r600_surface { unsigned cb_color_fmask_slice; /* EG and later */ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ unsigned cb_color_mask; /* R600 only */ + unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */ + unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */ + unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */ + unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */ unsigned sx_ps_downconvert; /* Stoney only */ unsigned sx_blend_opt_epsilon; /* Stoney only */ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index c94f1093ab7..76be37625f3 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1511,12 +1511,14 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi; bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev"; + bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = + HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev"; bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32"; bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp."; + bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = + HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp."; bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp; bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; @@ -1539,7 +1541,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp."; + bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = + HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp."; bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32"; bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem; diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index ad304747eab..2e5caa67d10 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -251,7 +251,7 @@ struct radeon_info { uint64_t gart_size; uint64_t vram_size; uint32_t max_sclk; - uint32_t max_compute_units; + uint32_t num_good_compute_units; uint32_t max_se; uint32_t max_sh_per_se; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 75a9d56d110..a93887ec271 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, enum pipe_format format = int_to_norm_format(info->dst.format); unsigned sample_mask = ~0; + /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and + * the format is R16G16. Use R16A16, which does work. + */ + if (format == PIPE_FORMAT_R16G16_UNORM) + format = PIPE_FORMAT_R16A16_UNORM; + if (format == PIPE_FORMAT_R16G16_SNORM) + format = PIPE_FORMAT_R16A16_SNORM; + if (info->src.resource->nr_samples > 1 && info->dst.resource->nr_samples <= 1 && util_max_layer(info->src.resource, 0) == 0 && diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 5a08cbfb198..6ef6eeec178 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -61,7 +61,7 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog /* Compute the scratch buffer size using the maximum number of waves. * This way we don't need to recompute it for each kernel launch. */ - unsigned scratch_waves = 32 * sctx->screen->b.info.max_compute_units; + unsigned scratch_waves = 32 * sctx->screen->b.info.num_good_compute_units; for (i = 0; i < program->shader.binary.global_symbol_count; i++) { unsigned offset = program->shader.binary.global_symbol_offsets[i]; @@ -402,7 +402,7 @@ static void si_launch_grid( num_waves_for_scratch = MIN2(num_waves_for_scratch, - 32 * sctx->screen->b.info.max_compute_units); + 32 * sctx->screen->b.info.num_good_compute_units); si_pm4_set_reg(pm4, R_00B860_COMPUTE_TMPRING_SIZE, /* The maximum value for WAVES is 32 * num CU. * If you program this value incorrectly, the GPU will hang if diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index a07b1c56579..e16ebbdef3e 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -771,7 +771,7 @@ void si_check_vm_faults(struct si_context *sctx) if (!si_vm_fault_occured(sctx, &addr)) return; - f = dd_get_debug_file(); + f = dd_get_debug_file(false); if (!f) return; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d157a9ffb00..6c796731a18 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -138,6 +138,22 @@ static void si_release_sampler_views(struct si_sampler_views *views) si_release_descriptors(&views->desc); } +static void si_sampler_view_add_buffers(struct si_context *sctx, + struct si_sampler_view *rview) +{ + if (rview->resource) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + rview->resource, RADEON_USAGE_READ, + r600_get_sampler_view_priority(rview->resource)); + } + + if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + rview->dcc_buffer, RADEON_USAGE_READ, + RADEON_PRIO_DCC); + } +} + static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_sampler_views *views) { @@ -149,12 +165,7 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_sampler_view *rview = (struct si_sampler_view*)views->views[i]; - if (!rview->resource) - continue; - - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - rview->resource, RADEON_USAGE_READ, - r600_get_sampler_view_priority(rview->resource)); + si_sampler_view_add_buffers(sctx, rview); } if (!views->desc.buffer) @@ -176,15 +187,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, struct si_sampler_view *rview = (struct si_sampler_view*)view; - if (rview->resource) - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - rview->resource, RADEON_USAGE_READ, - r600_get_sampler_view_priority(rview->resource)); - - if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - rview->dcc_buffer, RADEON_USAGE_READ, - RADEON_PRIO_DCC); + si_sampler_view_add_buffers(sctx, rview); pipe_sampler_view_reference(&views->views[slot], view); memcpy(views->desc.list + slot*8, view_desc, 8*4); @@ -978,9 +981,11 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, vs_base, true); si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, vs_base, true); - /* The TESSEVAL shader needs this for the ESGS ring buffer. */ - si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, - R_00B330_SPI_SHADER_USER_DATA_ES_0, true); + if (sctx->tes_shader.cso) { + /* The TESSEVAL shader needs this for the ESGS ring buffer. */ + si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, + R_00B330_SPI_SHADER_USER_DATA_ES_0, true); + } } else if (sctx->tes_shader.cso) { /* The TESSEVAL shader needs this for streamout. */ si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3e20c3b81fa..0c1ae90f9da 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -208,7 +208,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, * this for non-cs shaders. Using the wrong value here can result in * GPU lockups, but the maximum value seems to always work. */ - sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units; + sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units; #if HAVE_LLVM >= 0x0306 /* Initialize LLVM TargetMachine */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index f83cb024f0e..e2725fe3679 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -125,7 +125,11 @@ struct si_framebuffer { unsigned log_samples; unsigned cb0_is_integer; unsigned compressed_cb_mask; - unsigned export_16bpc; + unsigned spi_shader_col_format; + unsigned spi_shader_col_format_alpha; + unsigned spi_shader_col_format_blend; + unsigned spi_shader_col_format_blend_alpha; + unsigned color_is_int8; /* bitmask */ unsigned dirty_cbufs; bool dirty_zsbuf; }; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2de7def8dd2..94c1129c88d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -68,6 +68,7 @@ struct si_shader_context struct si_shader *shader; struct si_screen *screen; unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ + bool is_gs_copy_shader; int param_streamout_config; int param_streamout_write_index; int param_streamout_offset[4]; @@ -1119,9 +1120,20 @@ static void declare_system_value( value = get_sample_id(radeon_bld); break; - case TGSI_SEMANTIC_SAMPLEPOS: - value = load_sample_position(radeon_bld, get_sample_id(radeon_bld)); + case TGSI_SEMANTIC_SAMPLEPOS: { + LLVMValueRef pos[4] = { + LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_X_FLOAT), + LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Y_FLOAT), + lp_build_const_float(gallivm, 0), + lp_build_const_float(gallivm, 0) + }; + pos[0] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base, + TGSI_OPCODE_FRC, pos[0]); + pos[1] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base, + TGSI_OPCODE_FRC, pos[1]); + value = lp_build_gather_values(gallivm, pos, 4); break; + } case TGSI_SEMANTIC_SAMPLEMASK: /* Smoothing isn't MSAA in GL, but it's MSAA in hardware. @@ -1255,6 +1267,28 @@ static LLVMValueRef fetch_constant( return result; } +/* Upper 16 bits must be zero. */ +static LLVMValueRef si_llvm_pack_two_int16(struct gallivm_state *gallivm, + LLVMValueRef val[2]) +{ + return LLVMBuildOr(gallivm->builder, val[0], + LLVMBuildShl(gallivm->builder, val[1], + lp_build_const_int32(gallivm, 16), + ""), ""); +} + +/* Upper 16 bits are ignored and will be dropped. */ +static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct gallivm_state *gallivm, + LLVMValueRef val[2]) +{ + LLVMValueRef v[2] = { + LLVMBuildAnd(gallivm->builder, val[0], + lp_build_const_int32(gallivm, 0xffff), ""), + val[1], + }; + return si_llvm_pack_two_int16(gallivm, v); +} + /* Initialize arguments for the shader export intrinsic */ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, LLVMValueRef *values, @@ -1265,16 +1299,15 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; struct lp_build_context *base = &bld_base->base; - unsigned compressed = 0; + struct gallivm_state *gallivm = base->gallivm; + LLVMBuilderRef builder = base->gallivm->builder; + LLVMValueRef val[4]; + unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; unsigned chan; + bool is_int8; - /* XXX: This controls which components of the output - * registers actually get exported. (e.g bit 0 means export - * X component, bit 1 means export Y component, etc.) I'm - * hard coding this to 0xf for now. In the future, we might - * want to do something else. - */ - args[0] = lp_build_const_int32(base->gallivm, 0xf); + /* Default is 0xf. Adjusted below depending on the format. */ + args[0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */ /* Specify whether the EXEC mask represents the valid mask */ args[1] = uint->zero; @@ -1286,17 +1319,47 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, args[3] = lp_build_const_int32(base->gallivm, target); if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { + const union si_shader_key *key = &si_shader_ctx->shader->key; + unsigned col_formats = key->ps.spi_shader_col_format; int cbuf = target - V_008DFC_SQ_EXP_MRT; - if (cbuf >= 0 && cbuf < 8) - compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1; + assert(cbuf >= 0 && cbuf < 8); + spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf; + is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1; } - /* Set COMPR flag */ - args[4] = compressed ? uint->one : uint->zero; + args[4] = uint->zero; /* COMPR flag */ + args[5] = base->undef; + args[6] = base->undef; + args[7] = base->undef; + args[8] = base->undef; + + switch (spi_shader_col_format) { + case V_028714_SPI_SHADER_ZERO: + args[0] = uint->zero; /* writemask */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); + break; + + case V_028714_SPI_SHADER_32_R: + args[0] = uint->one; /* writemask */ + args[5] = values[0]; + break; + + case V_028714_SPI_SHADER_32_GR: + args[0] = lp_build_const_int32(base->gallivm, 0x3); /* writemask */ + args[5] = values[0]; + args[6] = values[1]; + break; + + case V_028714_SPI_SHADER_32_AR: + args[0] = lp_build_const_int32(base->gallivm, 0x9); /* writemask */ + args[5] = values[0]; + args[8] = values[3]; + break; + + case V_028714_SPI_SHADER_FP16_ABGR: + args[4] = uint->one; /* COMPR flag */ - if (compressed) { - /* Pixel shader needs to pack output values before export */ for (chan = 0; chan < 2; chan++) { LLVMValueRef pack_args[2] = { values[2 * chan], @@ -1306,18 +1369,107 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, packed = lp_build_intrinsic(base->gallivm->builder, "llvm.SI.packf16", - LLVMInt32TypeInContext(base->gallivm->context), - pack_args, 2, + uint->elem_type, pack_args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); args[chan + 5] = LLVMBuildBitCast(base->gallivm->builder, - packed, - LLVMFloatTypeInContext(base->gallivm->context), - ""); - args[chan + 7] = base->undef; + packed, base->elem_type, ""); } - } else + break; + + case V_028714_SPI_SHADER_UNORM16_ABGR: + for (chan = 0; chan < 4; chan++) { + val[chan] = radeon_llvm_saturate(bld_base, values[chan]); + val[chan] = LLVMBuildFMul(builder, val[chan], + lp_build_const_float(gallivm, 65535), ""); + val[chan] = LLVMBuildFAdd(builder, val[chan], + lp_build_const_float(gallivm, 0.5), ""); + val[chan] = LLVMBuildFPToUI(builder, val[chan], + uint->elem_type, ""); + } + + args[4] = uint->one; /* COMPR flag */ + args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int16(gallivm, val)); + args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int16(gallivm, val+2)); + break; + + case V_028714_SPI_SHADER_SNORM16_ABGR: + for (chan = 0; chan < 4; chan++) { + /* Clamp between [-1, 1]. */ + val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN, + values[chan], + lp_build_const_float(gallivm, 1)); + val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX, + val[chan], + lp_build_const_float(gallivm, -1)); + /* Convert to a signed integer in [-32767, 32767]. */ + val[chan] = LLVMBuildFMul(builder, val[chan], + lp_build_const_float(gallivm, 32767), ""); + /* If positive, add 0.5, else add -0.5. */ + val[chan] = LLVMBuildFAdd(builder, val[chan], + LLVMBuildSelect(builder, + LLVMBuildFCmp(builder, LLVMRealOGE, + val[chan], base->zero, ""), + lp_build_const_float(gallivm, 0.5), + lp_build_const_float(gallivm, -0.5), ""), ""); + val[chan] = LLVMBuildFPToSI(builder, val[chan], uint->elem_type, ""); + } + + args[4] = uint->one; /* COMPR flag */ + args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int32_as_int16(gallivm, val)); + args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int32_as_int16(gallivm, val+2)); + break; + + case V_028714_SPI_SHADER_UINT16_ABGR: { + LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ? + 255 : 65535); + /* Clamp. */ + for (chan = 0; chan < 4; chan++) { + val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]); + val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN, + val[chan], max); + } + + args[4] = uint->one; /* COMPR flag */ + args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int16(gallivm, val)); + args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int16(gallivm, val+2)); + break; + } + + case V_028714_SPI_SHADER_SINT16_ABGR: { + LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ? + 127 : 32767); + LLVMValueRef min = lp_build_const_int32(gallivm, is_int8 ? + -128 : -32768); + /* Clamp. */ + for (chan = 0; chan < 4; chan++) { + val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]); + val[chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_IMIN, + val[chan], max); + val[chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_IMAX, + val[chan], min); + } + + args[4] = uint->one; /* COMPR flag */ + args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int32_as_int16(gallivm, val)); + args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + si_llvm_pack_two_int32_as_int16(gallivm, val+2)); + break; + } + + case V_028714_SPI_SHADER_32_ABGR: memcpy(&args[5], values, sizeof(values[0]) * 4); + break; + } } static void si_alpha_test(struct lp_build_tgsi_context *bld_base, @@ -2000,6 +2152,8 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base) struct si_shader_output_values *outputs = NULL; int i,j; + assert(!si_shader_ctx->is_gs_copy_shader); + outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0])); /* Vertex color clamping. @@ -2008,8 +2162,7 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base) * an IF statement is added that clamps all colors if the constant * is true. */ - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && - !si_shader_ctx->shader->is_gs_copy_shader) { + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { struct lp_build_if_state if_ctx; LLVMValueRef cond = NULL; LLVMValueRef addr, val; @@ -3312,7 +3465,9 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, { struct gallivm_state *gallivm = bld_base->base.gallivm; - lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local", + lp_build_intrinsic(gallivm->builder, + HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier" + : "llvm.AMDGPU.barrier.local", LLVMVoidTypeInContext(gallivm->context), NULL, 0, LLVMNoUnwindAttribute); } @@ -3403,7 +3558,7 @@ static void create_function(struct si_shader_context *si_shader_ctx) params[SI_PARAM_LS_OUT_LAYOUT] = i32; num_params = SI_PARAM_LS_OUT_LAYOUT+1; } else { - if (shader->is_gs_copy_shader) { + if (si_shader_ctx->is_gs_copy_shader) { last_array_pointer = SI_PARAM_CONST_BUFFERS; num_params = SI_PARAM_CONST_BUFFERS+1; } else { @@ -3676,7 +3831,7 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx) build_indexed_load_const(si_shader_ctx, buf_ptr, offset); } - if (si_shader_ctx->shader->is_gs_copy_shader) { + if (si_shader_ctx->is_gs_copy_shader) { LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS); si_shader_ctx->gsvs_ring[0] = @@ -3850,22 +4005,65 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary static void si_shader_dump_stats(struct si_screen *sscreen, struct si_shader_config *conf, + unsigned num_inputs, unsigned code_size, struct pipe_debug_callback *debug, unsigned processor) { + unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256; + unsigned lds_per_wave = 0; + unsigned max_simd_waves = 10; + + /* Compute LDS usage for PS. */ + if (processor == TGSI_PROCESSOR_FRAGMENT) { + /* The minimum usage per wave is (num_inputs * 36). The maximum + * usage is (num_inputs * 36 * 16). + * We can get anything in between and it varies between waves. + * + * Other stages don't know the size at compile time or don't + * allocate LDS per wave, but instead they do it per thread group. + */ + lds_per_wave = conf->lds_size * lds_increment + + align(num_inputs * 36, lds_increment); + } + + /* Compute the per-SIMD wave counts. */ + if (conf->num_sgprs) { + if (sscreen->b.chip_class >= VI) + max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs); + else + max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs); + } + + if (conf->num_vgprs) + max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); + + /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD + * that PS can use. + */ + if (lds_per_wave) + max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); + if (r600_can_dump_shader(&sscreen->b, processor)) { fprintf(stderr, "*** SHADER STATS ***\n" - "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n" - "Scratch: %d bytes per wave\n********************\n", + "SGPRS: %d\n" + "VGPRS: %d\n" + "Code Size: %d bytes\n" + "LDS: %d blocks\n" + "Scratch: %d bytes per wave\n" + "Max Waves: %d\n" + "********************\n", conf->num_sgprs, conf->num_vgprs, code_size, - conf->lds_size, conf->scratch_bytes_per_wave); + conf->lds_size, conf->scratch_bytes_per_wave, + max_simd_waves); } pipe_debug_message(debug, SHADER_INFO, - "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d", + "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d " + "LDS: %d Scratch: %d Max Waves: %d", conf->num_sgprs, conf->num_vgprs, code_size, - conf->lds_size, conf->scratch_bytes_per_wave); + conf->lds_size, conf->scratch_bytes_per_wave, + max_simd_waves); } void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, @@ -3876,6 +4074,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, si_shader_dump_disassembly(&shader->binary, debug); si_shader_dump_stats(sscreen, &shader->config, + shader->selector->info.num_inputs, shader->binary.code_size, debug, processor); } @@ -3924,7 +4123,6 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; struct lp_build_context *base = &bld_base->base; struct lp_build_context *uint = &bld_base->uint_bld; - struct si_shader *shader = si_shader_ctx->shader; struct si_shader_output_values *outputs; struct tgsi_shader_info *gsinfo = &gs->selector->info; LLVMValueRef args[9]; @@ -3933,7 +4131,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0])); si_shader_ctx->type = TGSI_PROCESSOR_VERTEX; - shader->is_gs_copy_shader = true; + si_shader_ctx->is_gs_copy_shader = true; radeon_llvm_context_init(&si_shader_ctx->radeon_bld); @@ -4031,7 +4229,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) break; case PIPE_SHADER_FRAGMENT: - fprintf(f, " export_16bpc = 0x%X\n", key->ps.export_16bpc); + fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format); fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf); fprintf(f, " color_two_side = %u\n", key->ps.color_two_side); fprintf(f, " alpha_func = %u\n", key->ps.alpha_func); @@ -4208,7 +4406,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) { shader->gs_copy_shader = CALLOC_STRUCT(si_shader); shader->gs_copy_shader->selector = shader->selector; - shader->gs_copy_shader->key = shader->key; si_shader_ctx.shader = shader->gs_copy_shader; if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx, shader, dump, debug))) { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1635358d505..c1512078a18 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -213,6 +213,10 @@ struct si_shader_selector { /* PS parameters. */ unsigned db_shader_control; + /* Set 0xf or 0x0 (4 bits) per each written output. + * ANDed with spi_shader_col_format. + */ + unsigned colors_written_4bit; /* masks of "get_unique_index" bits */ uint64_t outputs_written; @@ -232,7 +236,8 @@ struct si_shader_selector { union si_shader_key { struct { - unsigned export_16bpc:8; + unsigned spi_shader_col_format; + unsigned color_is_int8:8; unsigned last_cbuf:3; unsigned color_two_side:1; unsigned alpha_func:3; @@ -292,7 +297,6 @@ struct si_shader { bool uses_instanceid; unsigned nr_pos_exports; unsigned nr_param_exports; - bool is_gs_copy_shader; bool dx10_clamp_mode; /* convert NaNs to 0 */ }; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 2a6d2c6ff36..9e0ccfc5dde 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -403,6 +403,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, if (!blend) return NULL; + blend->alpha_to_coverage = state->alpha_to_coverage; blend->alpha_to_one = state->alpha_to_one; blend->dual_src_blend = util_blend_state_is_dual(state, 0); @@ -419,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2)); + if (state->alpha_to_coverage) + blend->need_src_alpha_4bit |= 0xf; + blend->cb_target_mask = 0; for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ @@ -433,6 +437,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, unsigned blend_cntl = 0; + if (!state->rt[j].colormask) + continue; + /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ blend->cb_target_mask |= state->rt[j].colormask << (4 * i); @@ -453,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); } si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); + + blend->blend_enable_4bit |= 0xf << (i * 4); + + /* This is only important for formats without alpha. */ + if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) + blend->need_src_alpha_4bit |= 0xf << (i * 4); } if (blend->cb_target_mask) { @@ -1266,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat) } } -/* Returns the size in bits of the widest component of a CB format */ -static unsigned si_colorformat_max_comp_size(uint32_t colorformat) -{ - switch(colorformat) { - case V_028C70_COLOR_4_4_4_4: - return 4; - - case V_028C70_COLOR_1_5_5_5: - case V_028C70_COLOR_5_5_5_1: - return 5; - - case V_028C70_COLOR_5_6_5: - return 6; - - case V_028C70_COLOR_8: - case V_028C70_COLOR_8_8: - case V_028C70_COLOR_8_8_8_8: - return 8; - - case V_028C70_COLOR_10_10_10_2: - case V_028C70_COLOR_2_10_10_10: - return 10; - - case V_028C70_COLOR_10_11_11: - case V_028C70_COLOR_11_11_10: - return 11; - - case V_028C70_COLOR_16: - case V_028C70_COLOR_16_16: - case V_028C70_COLOR_16_16_16_16: - return 16; - - case V_028C70_COLOR_8_24: - case V_028C70_COLOR_24_8: - return 24; - - case V_028C70_COLOR_32: - case V_028C70_COLOR_32_32: - case V_028C70_COLOR_32_32_32_32: - case V_028C70_COLOR_X24_8_32_FLOAT: - return 32; - } - - assert(!"Unknown maximum component size"); - return 0; -} - static uint32_t si_translate_dbformat(enum pipe_format format) { switch (format) { @@ -1405,6 +1376,30 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, } } + if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && + sscreen->b.family >= CHIP_STONEY) { + switch (format) { + case PIPE_FORMAT_ETC1_RGB8: + case PIPE_FORMAT_ETC2_RGB8: + case PIPE_FORMAT_ETC2_SRGB8: + return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; + case PIPE_FORMAT_ETC2_RGB8A1: + case PIPE_FORMAT_ETC2_SRGB8A1: + return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; + case PIPE_FORMAT_ETC2_RGBA8: + case PIPE_FORMAT_ETC2_SRGBA8: + return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; + case PIPE_FORMAT_ETC2_R11_UNORM: + case PIPE_FORMAT_ETC2_R11_SNORM: + return V_008F14_IMG_DATA_FORMAT_ETC2_R; + case PIPE_FORMAT_ETC2_RG11_UNORM: + case PIPE_FORMAT_ETC2_RG11_SNORM: + return V_008F14_IMG_DATA_FORMAT_ETC2_RG; + default: + goto out_unknown; + } + } + if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { if (!enable_compressed_formats) goto out_unknown; @@ -1880,6 +1875,123 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten * framebuffer handling */ +static void si_choose_spi_color_formats(struct r600_surface *surf, + unsigned format, unsigned swap, + unsigned ntype, bool is_depth) +{ + /* Alpha is needed for alpha-to-coverage. + * Blending may be with or without alpha. + */ + unsigned normal = 0; /* most optimal, may not support blending or export alpha */ + unsigned alpha = 0; /* exports alpha, but may not support blending */ + unsigned blend = 0; /* supports blending, but may not export alpha */ + unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ + + /* Choose the SPI color formats. These are required values for Stoney/RB+. + * Other chips have multiple choices, though they are not necessarily better. + */ + switch (format) { + case V_028C70_COLOR_5_6_5: + case V_028C70_COLOR_1_5_5_5: + case V_028C70_COLOR_5_5_5_1: + case V_028C70_COLOR_4_4_4_4: + case V_028C70_COLOR_10_11_11: + case V_028C70_COLOR_11_11_10: + case V_028C70_COLOR_8: + case V_028C70_COLOR_8_8: + case V_028C70_COLOR_8_8_8_8: + case V_028C70_COLOR_10_10_10_2: + case V_028C70_COLOR_2_10_10_10: + if (ntype == V_028C70_NUMBER_UINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; + else if (ntype == V_028C70_NUMBER_SINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; + else + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; + break; + + case V_028C70_COLOR_16: + case V_028C70_COLOR_16_16: + case V_028C70_COLOR_16_16_16_16: + if (ntype == V_028C70_NUMBER_UNORM || + ntype == V_028C70_NUMBER_SNORM) { + /* UNORM16 and SNORM16 don't support blending */ + if (ntype == V_028C70_NUMBER_UNORM) + normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; + else + normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; + + /* Use 32 bits per channel for blending. */ + if (format == V_028C70_COLOR_16) { + if (swap == V_028C70_SWAP_STD) { /* R */ + blend = V_028714_SPI_SHADER_32_R; + blend_alpha = V_028714_SPI_SHADER_32_AR; + } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ + blend = blend_alpha = V_028714_SPI_SHADER_32_AR; + else + assert(0); + } else if (format == V_028C70_COLOR_16_16) { + if (swap == V_028C70_SWAP_STD) { /* RG */ + blend = V_028714_SPI_SHADER_32_GR; + blend_alpha = V_028714_SPI_SHADER_32_ABGR; + } else if (swap == V_028C70_SWAP_ALT) /* RA */ + blend = blend_alpha = V_028714_SPI_SHADER_32_AR; + else + assert(0); + } else /* 16_16_16_16 */ + blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; + } else if (ntype == V_028C70_NUMBER_UINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; + else if (ntype == V_028C70_NUMBER_SINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; + else if (ntype == V_028C70_NUMBER_FLOAT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; + else + assert(0); + break; + + case V_028C70_COLOR_32: + if (swap == V_028C70_SWAP_STD) { /* R */ + blend = normal = V_028714_SPI_SHADER_32_R; + alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; + } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; + else + assert(0); + break; + + case V_028C70_COLOR_32_32: + if (swap == V_028C70_SWAP_STD) { /* RG */ + blend = normal = V_028714_SPI_SHADER_32_GR; + alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; + } else if (swap == V_028C70_SWAP_ALT) /* RA */ + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; + else + assert(0); + break; + + case V_028C70_COLOR_32_32_32_32: + case V_028C70_COLOR_8_24: + case V_028C70_COLOR_24_8: + case V_028C70_COLOR_X24_8_32_FLOAT: + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; + break; + + default: + assert(0); + return; + } + + /* The DB->CB copy needs 32_ABGR. */ + if (is_depth) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; + + surf->spi_shader_col_format = normal; + surf->spi_shader_col_format_alpha = alpha; + surf->spi_shader_col_format_blend = blend; + surf->spi_shader_col_format_blend_alpha = blend_alpha; +} + static void si_initialize_color_surface(struct si_context *sctx, struct r600_surface *surf) { @@ -1893,7 +2005,6 @@ static void si_initialize_color_surface(struct si_context *sctx, const struct util_format_description *desc; int i; unsigned blend_clamp = 0, blend_bypass = 0; - unsigned max_comp_size; /* Layered rendering doesn't work with LINEAR_GENERAL. * (LINEAR_ALIGNED and others work) */ @@ -1971,6 +2082,12 @@ static void si_initialize_color_surface(struct si_context *sctx, blend_bypass = 1; } + if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && + (format == V_028C70_COLOR_8 || + format == V_028C70_COLOR_8_8 || + format == V_028C70_COLOR_8_8_8_8)) + surf->color_is_int8 = true; + color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | @@ -2050,13 +2167,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } /* Determine pixel shader export format */ - max_comp_size = si_colorformat_max_comp_size(format); - if (ntype == V_028C70_NUMBER_SRGB || - ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && - max_comp_size <= 10) || - (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { - surf->export_16bpc = true; - } + si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); if (sctx->b.family == CHIP_STONEY && !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) { @@ -2283,7 +2394,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&sctx->framebuffer.state, state); - sctx->framebuffer.export_16bpc = 0; + sctx->framebuffer.spi_shader_col_format = 0; + sctx->framebuffer.spi_shader_col_format_alpha = 0; + sctx->framebuffer.spi_shader_col_format_blend = 0; + sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; + sctx->framebuffer.color_is_int8 = 0; + sctx->framebuffer.compressed_cb_mask = 0; sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); @@ -2304,22 +2420,35 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_initialize_color_surface(sctx, surf); } - if (surf->export_16bpc) { - sctx->framebuffer.export_16bpc |= 1 << i; - } + sctx->framebuffer.spi_shader_col_format |= + surf->spi_shader_col_format << (i * 4); + sctx->framebuffer.spi_shader_col_format_alpha |= + surf->spi_shader_col_format_alpha << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend |= + surf->spi_shader_col_format_blend << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend_alpha |= + surf->spi_shader_col_format_blend_alpha << (i * 4); + + if (surf->color_is_int8) + sctx->framebuffer.color_is_int8 |= 1 << i; if (rtex->fmask.size && rtex->cmask.size) { sctx->framebuffer.compressed_cb_mask |= 1 << i; } r600_context_add_resource_size(ctx, surf->base.texture); } - /* Set the 16BPC export for possible dual-src blending. */ - if (i == 1 && surf && surf->export_16bpc) { - sctx->framebuffer.export_16bpc |= 1 << 1; + /* Set the second SPI format for possible dual-src blending. */ + if (i == 1 && surf) { + sctx->framebuffer.spi_shader_col_format |= + surf->spi_shader_col_format << (i * 4); + sctx->framebuffer.spi_shader_col_format_alpha |= + surf->spi_shader_col_format_alpha << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend |= + surf->spi_shader_col_format_blend << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend_alpha |= + surf->spi_shader_col_format_blend_alpha << (i * 4); } - assert(!(sctx->framebuffer.export_16bpc & ~0xff)); - if (state->zsbuf) { surf = (struct r600_surface*)state->zsbuf; @@ -2703,12 +2832,17 @@ si_create_sampler_view_custom(struct pipe_context *ctx, case PIPE_FORMAT_DXT3_SRGBA: case PIPE_FORMAT_DXT5_SRGBA: case PIPE_FORMAT_BPTC_SRGBA: + case PIPE_FORMAT_ETC2_SRGB8: + case PIPE_FORMAT_ETC2_SRGB8A1: + case PIPE_FORMAT_ETC2_SRGBA8: num_format = V_008F14_IMG_NUM_FORMAT_SRGB; break; case PIPE_FORMAT_RGTC1_SNORM: case PIPE_FORMAT_LATC1_SNORM: case PIPE_FORMAT_RGTC2_SNORM: case PIPE_FORMAT_LATC2_SNORM: + case PIPE_FORMAT_ETC2_R11_SNORM: + case PIPE_FORMAT_ETC2_RG11_SNORM: /* implies float, so use SNORM/UNORM to determine whether data is signed or not */ case PIPE_FORMAT_BPTC_RGB_FLOAT: @@ -3596,12 +3730,32 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); if (sctx->b.chip_class >= CIK) { - si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffc)); si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); - si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xfffe)); + si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); - si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); - si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); + + if (sscreen->b.info.num_good_compute_units / + (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { + /* Too few available compute units per SH. Disallowing + * VS to run on CU0 could hurt us more than late VS + * allocation would help. + * + * LATE_ALLOC_VS = 2 is the highest safe number. + */ + si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); + si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); + si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); + } else { + /* Set LATE_ALLOC_VS == 31. It should be less than + * the number of scratch waves. Limitations: + * - VS can't execute on CU0. + * - If HS writes outputs to LDS, LS can't execute on CU0. + */ + si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe)); + si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); + si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); + } + si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f5ca661f8d7..be3488e6dba 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -39,8 +39,14 @@ struct si_shader; struct si_state_blend { struct si_pm4_state pm4; uint32_t cb_target_mask; + bool alpha_to_coverage; bool alpha_to_one; bool dual_src_blend; + /* Set 0xf or 0x0 (4 bits) per render target if the following is + * true. ANDed with spi_shader_col_format. + */ + unsigned blend_enable_4bit; + unsigned need_src_alpha_4bit; }; struct si_state_rasterizer { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8ff70b44d45..36174eb5a94 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -212,13 +212,37 @@ static void si_shader_es(struct si_shader *shader) si_set_tesseval_regs(shader, pm4); } +/** + * Calculate the appropriate setting of VGT_GS_MODE when \p shader is a + * geometry shader. + */ +static uint32_t si_vgt_gs_mode(struct si_shader *shader) +{ + unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices; + unsigned cut_mode; + + if (gs_max_vert_out <= 128) { + cut_mode = V_028A40_GS_CUT_128; + } else if (gs_max_vert_out <= 256) { + cut_mode = V_028A40_GS_CUT_256; + } else if (gs_max_vert_out <= 512) { + cut_mode = V_028A40_GS_CUT_512; + } else { + assert(gs_max_vert_out <= 1024); + cut_mode = V_028A40_GS_CUT_1024; + } + + return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | + S_028A40_CUT_MODE(cut_mode)| + S_028A40_ES_WRITE_OPTIMIZE(1) | + S_028A40_GS_WRITE_OPTIMIZE(1); +} + static void si_shader_gs(struct si_shader *shader) { unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size; - unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices; unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2; unsigned gs_num_invocations = shader->selector->gs_num_invocations; - unsigned cut_mode; struct si_pm4_state *pm4; unsigned num_sgprs, num_user_sgprs; uint64_t va; @@ -232,22 +256,7 @@ static void si_shader_gs(struct si_shader *shader) if (!pm4) return; - if (gs_max_vert_out <= 128) { - cut_mode = V_028A40_GS_CUT_128; - } else if (gs_max_vert_out <= 256) { - cut_mode = V_028A40_GS_CUT_256; - } else if (gs_max_vert_out <= 512) { - cut_mode = V_028A40_GS_CUT_512; - } else { - assert(gs_max_vert_out <= 1024); - cut_mode = V_028A40_GS_CUT_1024; - } - - si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, - S_028A40_MODE(V_028A40_GS_SCENARIO_G) | - S_028A40_CUT_MODE(cut_mode)| - S_028A40_ES_WRITE_OPTIMIZE(1) | - S_028A40_GS_WRITE_OPTIMIZE(1)); + si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(shader)); si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize); si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1)); @@ -255,7 +264,7 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1)); - si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out); + si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, shader->selector->gs_max_out_vertices); si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize >> 2); si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) ? gs_vert_itemsize >> 2 : 0); @@ -289,7 +298,14 @@ static void si_shader_gs(struct si_shader *shader) S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } -static void si_shader_vs(struct si_shader *shader) +/** + * Compute the state for \p shader, which will run as a vertex shader on the + * hardware. + * + * If \p gs is non-NULL, it points to the geometry shader for which this shader + * is the copy shader. + */ +static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) { struct si_pm4_state *pm4; unsigned num_sgprs, num_user_sgprs; @@ -304,20 +320,26 @@ static void si_shader_vs(struct si_shader *shader) if (!pm4) return; - /* If this is the GS copy shader, the GS state writes this register. - * Otherwise, the VS state writes it. + /* We always write VGT_GS_MODE in the VS state, because every switch + * between different shader pipelines involving a different GS or no + * GS at all involves a switch of the VS (different GS use different + * copy shaders). On the other hand, when the API switches from a GS to + * no GS and then back to the same GS used originally, the GS state is + * not sent again. */ - if (!shader->is_gs_copy_shader) { + if (!gs) { si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0)); si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id); - } else + } else { + si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs)); si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0); + } va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); - if (shader->is_gs_copy_shader) { + if (gs) { vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */ num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_VERTEX) { @@ -382,13 +404,58 @@ static void si_shader_vs(struct si_shader *shader) si_set_tesseval_regs(shader, pm4); } +static unsigned si_get_spi_shader_col_format(struct si_shader *shader) +{ + unsigned value = shader->key.ps.spi_shader_col_format; + unsigned i, num_targets = (util_last_bit(value) + 3) / 4; + + /* If the i-th target format is set, all previous target formats must + * be non-zero to avoid hangs. + */ + for (i = 0; i < num_targets; i++) + if (!(value & (0xf << (i * 4)))) + value |= V_028714_SPI_SHADER_32_R << (i * 4); + + return value; +} + +static unsigned si_get_cb_shader_mask(unsigned spi_shader_col_format) +{ + unsigned i, cb_shader_mask = 0; + + for (i = 0; i < 8; i++) { + switch ((spi_shader_col_format >> (i * 4)) & 0xf) { + case V_028714_SPI_SHADER_ZERO: + break; + case V_028714_SPI_SHADER_32_R: + cb_shader_mask |= 0x1 << (i * 4); + break; + case V_028714_SPI_SHADER_32_GR: + cb_shader_mask |= 0x3 << (i * 4); + break; + case V_028714_SPI_SHADER_32_AR: + cb_shader_mask |= 0x9 << (i * 4); + break; + case V_028714_SPI_SHADER_FP16_ABGR: + case V_028714_SPI_SHADER_UNORM16_ABGR: + case V_028714_SPI_SHADER_SNORM16_ABGR: + case V_028714_SPI_SHADER_UINT16_ABGR: + case V_028714_SPI_SHADER_SINT16_ABGR: + case V_028714_SPI_SHADER_32_ABGR: + cb_shader_mask |= 0xf << (i * 4); + break; + default: + assert(0); + } + } + return cb_shader_mask; +} + static void si_shader_ps(struct si_shader *shader) { struct tgsi_shader_info *info = &shader->selector->info; struct si_pm4_state *pm4; - unsigned i, spi_ps_in_control; - unsigned spi_shader_col_format = 0, cb_shader_mask = 0; - unsigned colors_written, export_16bpc; + unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask; unsigned num_sgprs, num_user_sgprs; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; @@ -423,23 +490,18 @@ static void si_shader_ps(struct si_shader *shader) TGSI_FS_COORD_PIXEL_CENTER_INTEGER) spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1); - /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */ - colors_written = info->colors_written; - export_16bpc = shader->key.ps.export_16bpc; + spi_shader_col_format = si_get_spi_shader_col_format(shader); + cb_shader_mask = si_get_cb_shader_mask(spi_shader_col_format); - if (info->colors_written == 0x1 && - info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { - colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1; - } - - while (colors_written) { - i = u_bit_scan(&colors_written); - if (export_16bpc & (1 << i)) - spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * i); - else - spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * i); - cb_shader_mask |= 0xf << (4 * i); - } + /* This must be non-zero for alpha-test/kill to work. + * The hardware ignores the EXEC mask if no export memory is allocated. + * Don't add this to CB_SHADER_MASK. + */ + if (!spi_shader_col_format && + !info->writes_z && !info->writes_stencil && !info->writes_samplemask && + (shader->selector->info.uses_kill || + shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)) + spi_shader_col_format = V_028714_SPI_SHADER_32_R; /* Set interpolation controls. */ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) || @@ -498,7 +560,7 @@ static void si_shader_init_pm4_state(struct si_shader *shader) else if (shader->key.vs.as_es) si_shader_es(shader); else - si_shader_vs(shader); + si_shader_vs(shader, NULL); break; case PIPE_SHADER_TESS_CTRL: si_shader_hs(shader); @@ -507,11 +569,11 @@ static void si_shader_init_pm4_state(struct si_shader *shader) if (shader->key.tes.as_es) si_shader_es(shader); else - si_shader_vs(shader); + si_shader_vs(shader, NULL); break; case PIPE_SHADER_GEOMETRY: si_shader_gs(shader); - si_shader_vs(shader->gs_copy_shader); + si_shader_vs(shader->gs_copy_shader, shader); break; case PIPE_SHADER_FRAGMENT: si_shader_ps(shader); @@ -571,12 +633,47 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, break; case PIPE_SHADER_FRAGMENT: { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + struct si_state_blend *blend = sctx->queued.named.blend; if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && sel->info.colors_written == 0x1) key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; - key->ps.export_16bpc = sctx->framebuffer.export_16bpc; + if (blend) { + /* Select the shader color format based on whether + * blending or alpha are needed. + */ + key->ps.spi_shader_col_format = + (blend->blend_enable_4bit & blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format_blend_alpha) | + (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format_blend) | + (~blend->blend_enable_4bit & blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format_alpha) | + (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format); + } else + key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; + + /* If alpha-to-coverage is enabled, we have to export alpha + * even if there is no color buffer. + */ + if (!(key->ps.spi_shader_col_format & 0xf) && + blend && blend->alpha_to_coverage) + key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; + + /* On SI and CIK except Hawaii, the CB doesn't clamp outputs + * to the range supported by the type if a channel has less + * than 16 bits and the export format is 16_ABGR. + */ + if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII) + key->ps.color_is_int8 = sctx->framebuffer.color_is_int8; + + /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */ + if (!key->ps.last_cbuf) { + key->ps.spi_shader_col_format &= sel->colors_written_4bit; + key->ps.color_is_int8 &= sel->info.colors_written; + } if (rs) { bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES && @@ -762,6 +859,12 @@ static void *si_create_shader_selector(struct pipe_context *ctx, } sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16; break; + + case PIPE_SHADER_FRAGMENT: + for (i = 0; i < 8; i++) + if (sel->info.colors_written & (1 << i)) + sel->colors_written_4bit |= 0xf << (4 * i); + break; } /* DB_SHADER_CONTROL */ diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 573ab78b482..9e1e158219f 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -2062,12 +2062,12 @@ #define V_008F14_IMG_DATA_FORMAT_8_24 0x14 #define V_008F14_IMG_DATA_FORMAT_24_8 0x15 #define V_008F14_IMG_DATA_FORMAT_X24_8_32 0x16 -#define V_008F14_IMG_DATA_FORMAT_RESERVED_23 0x17 -#define V_008F14_IMG_DATA_FORMAT_RESERVED_24 0x18 -#define V_008F14_IMG_DATA_FORMAT_RESERVED_25 0x19 -#define V_008F14_IMG_DATA_FORMAT_RESERVED_26 0x1A -#define V_008F14_IMG_DATA_FORMAT_RESERVED_27 0x1B -#define V_008F14_IMG_DATA_FORMAT_RESERVED_28 0x1C +#define V_008F14_IMG_DATA_FORMAT_8_AS_8_8_8_8 0x17 /* stoney+ */ +#define V_008F14_IMG_DATA_FORMAT_ETC2_RGB 0x18 /* stoney+ */ +#define V_008F14_IMG_DATA_FORMAT_ETC2_RGBA 0x19 /* stoney+ */ +#define V_008F14_IMG_DATA_FORMAT_ETC2_R 0x1A /* stoney+ */ +#define V_008F14_IMG_DATA_FORMAT_ETC2_RG 0x1B /* stoney+ */ +#define V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1 0x1C /* stoney+ */ #define V_008F14_IMG_DATA_FORMAT_RESERVED_29 0x1D #define V_008F14_IMG_DATA_FORMAT_RESERVED_30 0x1E #define V_008F14_IMG_DATA_FORMAT_RESERVED_31 0x1F @@ -2081,8 +2081,8 @@ #define V_008F14_IMG_DATA_FORMAT_BC5 0x27 #define V_008F14_IMG_DATA_FORMAT_BC6 0x28 #define V_008F14_IMG_DATA_FORMAT_BC7 0x29 -#define V_008F14_IMG_DATA_FORMAT_RESERVED_42 0x2A -#define V_008F14_IMG_DATA_FORMAT_RESERVED_43 0x2B +#define V_008F14_IMG_DATA_FORMAT_16_AS_16_16_16_16 0x2A /* stoney+ */ +#define V_008F14_IMG_DATA_FORMAT_16_AS_32_32_32_32 0x2B /* stoney+ */ #define V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1 0x2C #define V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1 0x2D #define V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1 0x2E @@ -2100,8 +2100,8 @@ #define V_008F14_IMG_DATA_FORMAT_6_5_5 0x3A #define V_008F14_IMG_DATA_FORMAT_1 0x3B #define V_008F14_IMG_DATA_FORMAT_1_REVERSED 0x3C -#define V_008F14_IMG_DATA_FORMAT_32_AS_8 0x3D -#define V_008F14_IMG_DATA_FORMAT_32_AS_8_8 0x3E +#define V_008F14_IMG_DATA_FORMAT_32_AS_8 0x3D /* not on stoney */ +#define V_008F14_IMG_DATA_FORMAT_32_AS_8_8 0x3E /* not on stoney */ #define V_008F14_IMG_DATA_FORMAT_32_AS_32_32_32_32 0x3F #define S_008F14_NUM_FORMAT(x) (((x) & 0x0F) << 26) #define G_008F14_NUM_FORMAT(x) (((x) >> 26) & 0x0F) diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index b5ab9249835..6e703f76499 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -119,7 +119,22 @@ trace_context_draw_vbo(struct pipe_context *_pipe, trace_dump_trace_flush(); - pipe->draw_vbo(pipe, info); + if (info->indirect) { + struct pipe_draw_info *_info = NULL; + + _info = MALLOC(sizeof(*_info)); + if (!_info) + return; + + memcpy(_info, info, sizeof(*_info)); + _info->indirect = trace_resource_unwrap(tr_ctx, _info->indirect); + _info->indirect_params = trace_resource_unwrap(tr_ctx, + _info->indirect_params); + pipe->draw_vbo(pipe, _info); + FREE(_info); + } else { + pipe->draw_vbo(pipe, info); + } trace_dump_call_end(); } diff --git a/src/gallium/drivers/virgl/Android.mk b/src/gallium/drivers/virgl/Android.mk new file mode 100644 index 00000000000..b8309e43d71 --- /dev/null +++ b/src/gallium/drivers/virgl/Android.mk @@ -0,0 +1,35 @@ +# Copyright (C) 2014 Emil Velikov <[email protected]> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) + +LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_MODULE := libmesa_pipe_virgl + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 2d9610ee9ab..d4030852943 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -92,6 +92,10 @@ ifneq ($(filter vc4,$(MESA_GPU_DRIVERS)),) LOCAL_CFLAGS += -DGALLIUM_VC4 gallium_DRIVERS += libmesa_winsys_vc4 libmesa_pipe_vc4 endif +ifneq ($(filter virgl,$(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -DGALLIUM_VIRGL +gallium_DRIVERS += libmesa_winsys_virgl libmesa_pipe_virgl +endif ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),) gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga LOCAL_CFLAGS += -DGALLIUM_VMWGFX @@ -100,7 +104,7 @@ ifneq ($(filter nouveau r600g,$(MESA_GPU_DRIVERS)),) LOCAL_SHARED_LIBRARIES += $(if $(filter true,$(MESA_LOLLIPOP_BUILD)),libc++,libstlport) endif -LOCAL_STATIC_LIBRARIES := \ +LOCAL_WHOLE_STATIC_LIBRARIES := \ $(gallium_DRIVERS) \ libmesa_st_dri \ libmesa_st_mesa \ @@ -112,6 +116,8 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_util \ libmesa_loader \ +LOCAL_STATIC_LIBRARIES := + ifeq ($(MESA_ENABLE_LLVM),true) LOCAL_STATIC_LIBRARIES += \ libLLVMR600CodeGen \ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 82c803b564d..30a1aa8d6ba 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -128,6 +128,11 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); int i; + pipe_mutex_lock(bo->ws->global_bo_list_lock); + LIST_DEL(&bo->global_list_item); + bo->ws->num_buffers--; + pipe_mutex_unlock(bo->ws->global_bo_list_lock); + amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); amdgpu_va_range_free(bo->va_handle); amdgpu_bo_free(bo->bo); @@ -249,6 +254,16 @@ static const struct pb_vtbl amdgpu_winsys_bo_vtbl = { /* other functions are never called */ }; +static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo) +{ + struct amdgpu_winsys *ws = bo->ws; + + pipe_mutex_lock(ws->global_bo_list_lock); + LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list); + ws->num_buffers++; + pipe_mutex_unlock(ws->global_bo_list_lock); +} + static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, unsigned size, unsigned alignment, @@ -319,6 +334,8 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, else if (initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt += align(size, ws->gart_page_size); + amdgpu_add_buffer_to_global_list(bo); + return bo; error_va_map: @@ -588,6 +605,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, else if (bo->initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt += align(bo->base.size, ws->gart_page_size); + amdgpu_add_buffer_to_global_list(bo); + return &bo->base; error_va_map: @@ -673,6 +692,8 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, ws->allocated_gtt += align(bo->base.size, ws->gart_page_size); + amdgpu_add_buffer_to_global_list(bo); + return (struct pb_buffer*)bo; error_va_map: diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 12cb920b387..54f5dbdc459 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -60,6 +60,8 @@ struct amdgpu_winsys_bo { /* Fences for buffer synchronization. */ struct pipe_fence_handle *fence[RING_LAST]; + + struct list_head global_list_item; }; bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 10f112d01b3..83da740f649 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -605,6 +605,7 @@ static void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs) } DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", FALSE) static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, @@ -644,9 +645,35 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) { int r; - r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, - cs->handles, cs->flags, - &cs->request.resources); + /* Use a buffer list containing all allocated buffers if requested. */ + if (debug_get_option_all_bos()) { + struct amdgpu_winsys_bo *bo; + amdgpu_bo_handle *handles; + unsigned num = 0; + + pipe_mutex_lock(ws->global_bo_list_lock); + + handles = malloc(sizeof(handles[0]) * ws->num_buffers); + if (!handles) { + pipe_mutex_unlock(ws->global_bo_list_lock); + goto cleanup; + } + + LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) { + assert(num < ws->num_buffers); + handles[num++] = bo->bo; + } + + r = amdgpu_bo_list_create(ws->dev, ws->num_buffers, + handles, NULL, + &cs->request.resources); + free(handles); + pipe_mutex_unlock(ws->global_bo_list_lock); + } else { + r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, + cs->handles, cs->flags, + &cs->request.resources); + } if (r) { fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 39d3aa4f783..7393a1d1eb4 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -266,17 +266,12 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws) ws->info.r600_virtual_address = TRUE; ws->info.r600_has_dma = dma.available_rings != 0; - /* Guess what the maximum compute unit number is by looking at the mask - * of enabled CUs. - */ + /* Get the number of good compute units. */ + ws->info.num_good_compute_units = 0; for (i = 0; i < ws->info.max_se; i++) - for (j = 0; j < ws->info.max_sh_per_se; j++) { - unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]); - - if (ws->info.max_compute_units < max) - ws->info.max_compute_units = max; - } - ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se; + for (j = 0; j < ws->info.max_sh_per_se; j++) + ws->info.num_good_compute_units += + util_bitcount(ws->amdinfo.cu_bitmap[i][j]); memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode, sizeof(ws->amdinfo.gb_tile_mode)); @@ -305,6 +300,7 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws) pipe_mutex_destroy(ws->bo_fence_lock); pb_cache_deinit(&ws->bo_cache); + pipe_mutex_destroy(ws->global_bo_list_lock); AddrDestroy(ws->addrlib); amdgpu_device_deinitialize(ws->dev); FREE(rws); @@ -477,6 +473,8 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create) amdgpu_cs_init_functions(ws); amdgpu_surface_init_functions(ws); + LIST_INITHEAD(&ws->global_bo_list); + pipe_mutex_init(ws->global_bo_list_lock); pipe_mutex_init(ws->bo_fence_lock); /* Create the screen at the end. The winsys must be initialized diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 615f55411f8..91b9be4bb32 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -63,6 +63,11 @@ struct amdgpu_winsys { ADDR_HANDLE addrlib; uint32_t rev_id; unsigned family; + + /* List of all allocated buffers */ + pipe_mutex global_bo_list_lock; + struct list_head global_bo_list; + unsigned num_buffers; }; static inline struct amdgpu_winsys * diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index c7e058bf3da..8a1ed3ae08c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -419,9 +419,9 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) &ws->info.r600_max_pipes); /* All GPUs have at least one compute unit */ - ws->info.max_compute_units = 1; + ws->info.num_good_compute_units = 1; radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL, - &ws->info.max_compute_units); + &ws->info.num_good_compute_units); radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, &ws->info.max_se); diff --git a/src/gallium/winsys/virgl/drm/Android.mk b/src/gallium/winsys/virgl/drm/Android.mk new file mode 100644 index 00000000000..849350343f6 --- /dev/null +++ b/src/gallium/winsys/virgl/drm/Android.mk @@ -0,0 +1,34 @@ +# Copyright (C) 2014 Emil Velikov <[email protected]> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_MODULE := libmesa_winsys_virgl + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index f2e2165e8c3..95e86df1cdd 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -3267,7 +3267,7 @@ builtin_builder::_atan2(const glsl_type *type) ir_factory outer_then(&outer_if->then_instructions, mem_ctx); /* Then...call atan(y/x) */ - do_atan(body, glsl_type::float_type, r, div(y, x)); + do_atan(outer_then, glsl_type::float_type, r, div(y, x)); /* ...and fix it up: */ ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index 221aab0043b..ccc04c00cea 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -667,7 +667,7 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxVaryingComponents", state->ctx->Const.MaxVarying * 4); } - if (state->is_version(150, 0)) { + if (state->has_geometry_shader()) { add_const("gl_MaxVertexOutputComponents", state->Const.MaxVertexOutputComponents); add_const("gl_MaxGeometryInputComponents", @@ -730,12 +730,11 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxAtomicCounterBindings", state->Const.MaxAtomicBufferBindings); - /* When Mesa adds support for GL_OES_geometry_shader and - * GL_OES_tessellation_shader, this will need to change. - */ - if (!state->es_shader) { + if (state->has_geometry_shader()) { add_const("gl_MaxGeometryAtomicCounters", state->Const.MaxGeometryAtomicCounters); + } + if (!state->es_shader) { add_const("gl_MaxTessControlAtomicCounters", state->Const.MaxTessControlAtomicCounters); add_const("gl_MaxTessEvaluationAtomicCounters", @@ -753,12 +752,11 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxAtomicCounterBufferSize", state->Const.MaxAtomicCounterBufferSize); - /* When Mesa adds support for GL_OES_geometry_shader and - * GL_OES_tessellation_shader, this will need to change. - */ - if (!state->es_shader) { + if (state->has_geometry_shader()) { add_const("gl_MaxGeometryAtomicCounterBuffers", state->Const.MaxGeometryAtomicCounterBuffers); + } + if (!state->es_shader) { add_const("gl_MaxTessControlAtomicCounterBuffers", state->Const.MaxTessControlAtomicCounterBuffers); add_const("gl_MaxTessEvaluationAtomicCounterBuffers", @@ -814,13 +812,16 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxCombinedImageUniforms", state->Const.MaxCombinedImageUniforms); + if (state->has_geometry_shader()) { + add_const("gl_MaxGeometryImageUniforms", + state->Const.MaxGeometryImageUniforms); + } + if (!state->es_shader) { add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs", state->Const.MaxCombinedShaderOutputResources); add_const("gl_MaxImageSamples", state->Const.MaxImageSamples); - add_const("gl_MaxGeometryImageUniforms", - state->Const.MaxGeometryImageUniforms); } if (state->is_version(450, 310)) { @@ -1070,7 +1071,7 @@ builtin_variable_generator::generate_fs_special_vars() if (state->is_version(120, 100)) add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord"); - if (state->is_version(150, 0)) { + if (state->has_geometry_shader()) { var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); var->data.interpolation = INTERP_QUALIFIER_FLAT; } diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 10198758944..2109fb2eedd 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1270,7 +1270,7 @@ layout_qualifier_id: } } - if ($$.flags.i && !state->is_version(150, 0)) { + if ($$.flags.i && !state->has_geometry_shader()) { _mesa_glsl_error(& @1, state, "#version 150 layout " "qualifier `%s' used", $1); } @@ -1507,7 +1507,7 @@ layout_qualifier_id: if (match_layout_qualifier("max_vertices", $1, state) == 0) { $$.flags.q.max_vertices = 1; $$.max_vertices = new(ctx) ast_layout_expression(@1, $3); - if (!state->is_version(150, 0)) { + if (!state->has_geometry_shader()) { _mesa_glsl_error(& @3, state, "#version 150 max_vertices qualifier " "specified", $3); diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 131a5641f8f..ecf0d7f76e5 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -600,6 +600,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { /* OES extensions go here, sorted alphabetically. */ EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), + EXT(OES_geometry_shader, false, true, OES_geometry_shader), EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), EXT(OES_texture_3D, false, true, dummy_true), EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample), diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index ecc29920918..3f88e01d599 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -265,6 +265,11 @@ struct _mesa_glsl_parse_state { return ARB_compute_shader_enable || is_version(430, 310); } + bool has_geometry_shader() const + { + return OES_geometry_shader_enable || is_version(150, 320); + } + void process_version_directive(YYLTYPE *locp, int version, const char *ident); @@ -586,6 +591,8 @@ struct _mesa_glsl_parse_state { */ bool OES_EGL_image_external_enable; bool OES_EGL_image_external_warn; + bool OES_geometry_shader_enable; + bool OES_geometry_shader_warn; bool OES_standard_derivatives_enable; bool OES_standard_derivatives_warn; bool OES_texture_3D_enable; diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 09f80d0f39d..264b69ca619 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -1001,23 +1001,20 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) const ir_variable *const var = (producer_var != NULL) ? producer_var : consumer_var; + const gl_shader_stage stage = (producer_var != NULL) + ? producer_stage : consumer_stage; + const glsl_type *type = get_varying_type(var, stage); this->matches[this->num_matches].packing_class = this->compute_packing_class(var); this->matches[this->num_matches].packing_order = this->compute_packing_order(var); if (this->disable_varying_packing) { - unsigned slots; - gl_shader_stage stage = - (producer_var != NULL) ? producer_stage : consumer_stage; - - const glsl_type *type = get_varying_type(var, stage); - - slots = type->count_attribute_slots(false); + unsigned slots = type->count_attribute_slots(false); this->matches[this->num_matches].num_components = slots * 4; } else { this->matches[this->num_matches].num_components - = var->type->component_slots(); + = type->component_slots(); } this->matches[this->num_matches].producer_var = producer_var; this->matches[this->num_matches].consumer_var = consumer_var; diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index d140be346cf..7c2d4d7ce51 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -470,8 +470,8 @@ lower_instructions_visitor::dldexp_to_arith(ir_expression *ir) ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); - ir_constant *exp_shift = new(ir) ir_constant(20, vec_elem); - ir_constant *exp_width = new(ir) ir_constant(11, vec_elem); + ir_constant *exp_shift = new(ir) ir_constant(20u); + ir_constant *exp_width = new(ir) ir_constant(11u); ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); /* Temporary variables */ diff --git a/src/loader/pci_id_driver_map.h b/src/loader/pci_id_driver_map.h index 11e39d3a206..cab69fb87f4 100644 --- a/src/loader/pci_id_driver_map.h +++ b/src/loader/pci_id_driver_map.h @@ -53,6 +53,12 @@ static const int radeonsi_chip_ids[] = { #undef CHIPSET }; +static const int virtio_gpu_chip_ids[] = { +#define CHIPSET(chip, name, family) chip, +#include "pci_ids/virtio_gpu_pci_ids.h" +#undef CHIPSET +}; + static const int vmwgfx_chip_ids[] = { #define CHIPSET(chip, name, family) chip, #include "pci_ids/vmwgfx_pci_ids.h" @@ -78,6 +84,7 @@ static const struct { { 0x1002, "radeonsi", radeonsi_chip_ids, ARRAY_SIZE(radeonsi_chip_ids), _LOADER_GALLIUM}, { 0x10de, "nouveau_vieux", NULL, -1, _LOADER_DRI, is_nouveau_vieux }, { 0x10de, "nouveau", NULL, -1, _LOADER_GALLIUM }, + { 0x1af4, "virtio_gpu", virtio_gpu_chip_ids, ARRAY_SIZE(virtio_gpu_chip_ids), _LOADER_GALLIUM }, { 0x15ad, "vmwgfx", vmwgfx_chip_ids, ARRAY_SIZE(vmwgfx_chip_ids), _LOADER_GALLIUM }, { 0x0000, NULL, NULL, 0 }, }; diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py index 58ec08be466..2a8043264eb 100644 --- a/src/mapi/glapi/gen/apiexec.py +++ b/src/mapi/glapi/gen/apiexec.py @@ -70,9 +70,8 @@ functions = { # extension with core profile. "TexBuffer": exec_info(core=31), - # OpenGL 3.2 / GL_ARB_geometry_shader4. Mesa does not support - # GL_ARB_geometry_shader4, so OpenGL 3.2 is required. - "FramebufferTexture": exec_info(core=32), + # OpenGL 3.2 / GL_OES_geometry_shader. + "FramebufferTexture": exec_info(core=32, es2=31), # OpenGL 4.0 / GL_ARB_shader_subroutines. Mesa only exposes this # extension with core profile. diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 577d8254c43..86df980304b 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -940,4 +940,47 @@ </function> </category> + +<!-- 175. GL_OES_geometry_shader --> +<category name="GL_OES_geometry_shader" number="210"> + <enum name="GEOMETRY_SHADER_OES" value="0x8DD9"/> + <enum name="GEOMETRY_SHADER_BIT_OES" value="0x00000004"/> + <enum name="GEOMETRY_LINKED_VERTICES_OUT_OES" value="0x8916"/> + <enum name="GEOMETRY_LINKED_INPUT_TYPE_OES" value="0x8917"/> + <enum name="GEOMETRY_LINKED_OUTPUT_TYPE_OES" value="0x8918"/> + <enum name="GEOMETRY_SHADER_INVOCATIONS_OES" value="0x887F"/> + <enum name="LAYER_PROVOKING_VERTEX_OES" value="0x825E"/> + <enum name="MAX_GEOMETRY_UNIFORM_BLOCKS_OES" value="0x8A2C"/> + <enum name="MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS_OES" value="0x8A32"/> + <enum name="MAX_GEOMETRY_INPUT_COMPONENTS_OES" value="0x9123"/> + <enum name="MAX_GEOMETRY_OUTPUT_COMPONENTS_OES" value="0x9124"/> + <enum name="MAX_GEOMETRY_OUTPUT_VERTICES_OES" value="0x8DE0"/> + <enum name="MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS_OES" value="0x8DE1"/> + <enum name="MAX_GEOMETRY_SHADER_INVOCATIONS_OES" value="0x8E5A"/> + <enum name="MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_OES" value="0x8C29"/> + <enum name="MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS_OES" value="0x92CF"/> + <enum name="MAX_GEOMETRY_ATOMIC_COUNTERS_OES" value="0x92D5"/> + <enum name="MAX_GEOMETRY_IMAGE_UNIFORMS_OES" value="0x90CD"/> + <enum name="MAX_GEOMETRY_SHADER_STORAGE_BLOCKS_OES" value="0x90D7"/> + <enum name="FIRST_VERTEX_CONVENTION_OES" value="0x8E4D"/> + <enum name="LAST_VERTEX_CONVENTION_OES" value="0x8E4E"/> + <enum name="UNDEFINED_VERTEX_OES" value="0x8260"/> + <enum name="PRIMITIVES_GENERATED_OES" value="0x8C87"/> + <enum name="LINES_ADJACENCY_OES" value="0xA"/> + <enum name="LINE_STRIP_ADJACENCY_OES" value="0xB"/> + <enum name="TRIANGLES_ADJACENCY_OES" value="0xC"/> + <enum name="TRIANGLE_STRIP_ADJACENCY_OES" value="0xD"/> + <enum name="FRAMEBUFFER_DEFAULT_LAYERS_OES" value="0x9312"/> + <enum name="MAX_FRAMEBUFFER_LAYERS_OES" value="0x9317"/> + <enum name="FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS_OES" value="0x8DA8"/> + <enum name="FRAMEBUFFER_ATTACHMENT_LAYERED_OES" value="0x8DA7"/> + <enum name="REFERENCED_BY_GEOMETRY_SHADER_OES" value="0x9309"/> + + <function name="FramebufferTextureOES" alias="FramebufferTexture" es2="3.1"> + <param name="target" type="GLenum"/> + <param name="attachment" type="GLenum"/> + <param name="texture" type="GLuint"/> + <param name="level" type="GLint"/> + </function> + </category> </OpenGLAPI> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 1ed0e4d1f59..5f2e79637c2 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -3179,8 +3179,10 @@ decompress_texture_image(struct gl_context *ctx, /* restrict sampling to the texture level of interest */ if (target != GL_TEXTURE_RECTANGLE_ARB) { - _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, texImage->Level); - _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, texImage->Level); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_BASE_LEVEL, + (GLint *) &texImage->Level, false); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, + (GLint *) &texImage->Level, false); } /* render quad w/ texture into renderbuffer */ @@ -3190,8 +3192,10 @@ decompress_texture_image(struct gl_context *ctx, * be restored by _mesa_meta_end(). */ if (target != GL_TEXTURE_RECTANGLE_ARB) { - _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, baseLevelSave); - _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, maxLevelSave); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_BASE_LEVEL, + &baseLevelSave, false); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, + &maxLevelSave, false); } } diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index 3691e7d6123..074f70da6cd 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -469,7 +469,7 @@ _mesa_meta_bind_rb_as_tex_image(struct gl_context *ctx, struct gl_sampler_object * _mesa_meta_setup_sampler(struct gl_context *ctx, - const struct gl_texture_object *texObj, + struct gl_texture_object *texObj, GLenum target, GLenum filter, GLuint srcLevel); extern GLbitfield diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index b414dce8819..5d80f7d8fe1 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -828,22 +828,29 @@ void _mesa_meta_fb_tex_blit_end(struct gl_context *ctx, GLenum target, struct fb_tex_blit_state *blit) { + struct gl_texture_object *const texObj = + _mesa_get_current_tex_object(ctx, target); + /* Restore texture object state, the texture binding will * be restored by _mesa_meta_end(). */ if (target != GL_TEXTURE_RECTANGLE_ARB) { - _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, blit->baseLevelSave); - _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, blit->maxLevelSave); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_BASE_LEVEL, + &blit->baseLevelSave, false); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, + &blit->maxLevelSave, false); + } - if (ctx->Extensions.ARB_stencil_texturing) { - const struct gl_texture_object *texObj = - _mesa_get_current_tex_object(ctx, target); + /* If ARB_stencil_texturing is not supported, the mode won't have changed. */ + if (texObj->StencilSampling != blit->stencilSamplingSave) { + /* GLint so the compiler won't complain about type signedness mismatch + * in the call to _mesa_texture_parameteriv below. + */ + const GLint param = blit->stencilSamplingSave ? + GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; - if (texObj->StencilSampling != blit->stencilSamplingSave) - _mesa_TexParameteri(target, GL_DEPTH_STENCIL_TEXTURE_MODE, - blit->stencilSamplingSave ? - GL_STENCIL_INDEX : GL_DEPTH_COMPONENT); - } + _mesa_texture_parameteriv(ctx, texObj, GL_DEPTH_STENCIL_TEXTURE_MODE, + ¶m, false); } _mesa_bind_sampler(ctx, ctx->Texture.CurrentUnit, blit->samp_obj_save); @@ -895,7 +902,7 @@ _mesa_meta_bind_rb_as_tex_image(struct gl_context *ctx, struct gl_sampler_object * _mesa_meta_setup_sampler(struct gl_context *ctx, - const struct gl_texture_object *texObj, + struct gl_texture_object *texObj, GLenum target, GLenum filter, GLuint srcLevel) { struct gl_sampler_object *samp_obj; @@ -915,8 +922,10 @@ _mesa_meta_setup_sampler(struct gl_context *ctx, /* Prepare src texture state */ _mesa_BindTexture(target, texObj->Name); if (target != GL_TEXTURE_RECTANGLE_ARB) { - _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel); - _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_BASE_LEVEL, + (GLint *) &srcLevel, false); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, + (GLint *) &srcLevel, false); } return samp_obj; diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 04b9cafe308..2c2b7ba6bf8 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -235,7 +235,7 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, } if (src_view_tex_image) { - /* Prever the tex image because, even if we have a renderbuffer, we may + /* Prefer the tex image because, even if we have a renderbuffer, we may * have had to wrap it in a texture view. */ _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment, diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index f20fcac68d6..27435b2b722 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -185,6 +185,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, GLint swizzle[4]; GLboolean swizzleSaved = GL_FALSE; + /* GLint so the compiler won't complain about type signedness mismatch in + * the calls to _mesa_texture_parameteriv below. + */ + static const GLint always_false = GL_FALSE; + static const GLint always_true = GL_TRUE; + if (fallback_required(ctx, target, texObj)) { _mesa_generate_mipmap(ctx, target, texObj); return; @@ -248,13 +254,14 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, assert(mipmap->FBO != 0); _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, mipmap->FBO); - _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, GL_FALSE); + _mesa_texture_parameteriv(ctx, texObj, GL_GENERATE_MIPMAP, &always_false, false); if (texObj->_Swizzle != SWIZZLE_NOOP) { static const GLint swizzleNoop[4] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA }; memcpy(swizzle, texObj->Swizzle, sizeof(swizzle)); swizzleSaved = GL_TRUE; - _mesa_TexParameteriv(target, GL_TEXTURE_SWIZZLE_RGBA, swizzleNoop); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_SWIZZLE_RGBA, + swizzleNoop, false); } /* Silence valgrind warnings about reading uninitialized stack. */ @@ -309,7 +316,8 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, /* Allocate storage for the destination mipmap image(s) */ /* Set MaxLevel large enough to hold the new level when we allocate it */ - _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, dstLevel); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, + (GLint *) &dstLevel, false); if (!prepare_mipmap_level(ctx, texObj, dstLevel, dstWidth, dstHeight, dstDepth, @@ -323,7 +331,8 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, dstImage = _mesa_select_tex_image(texObj, faceTarget, dstLevel); /* limit minification to src level */ - _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, + (GLint *) &srcLevel, false); /* setup viewport */ _mesa_set_viewport(ctx, 0, 0, 0, dstWidth, dstHeight); @@ -373,9 +382,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_meta_end(ctx); - _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, maxLevelSave); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_MAX_LEVEL, &maxLevelSave, + false); if (genMipmapSave) - _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, genMipmapSave); + _mesa_texture_parameteriv(ctx, texObj, GL_GENERATE_MIPMAP, &always_true, + false); if (swizzleSaved) - _mesa_TexParameteriv(target, GL_TEXTURE_SWIZZLE_RGBA, swizzle); + _mesa_texture_parameteriv(ctx, texObj, GL_TEXTURE_SWIZZLE_RGBA, swizzle, + false); } diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc index e1874c3f1dc..183a1dcabe8 100644 --- a/src/mesa/drivers/dri/common/drirc +++ b/src/mesa/drivers/dri/common/drirc @@ -37,26 +37,26 @@ TODO: document the other workarounds. <application name="Unigine Heaven (32-bit)" executable="heaven_x86"> <option name="allow_glsl_extension_directive_midshader" value="true" /> - <!-- remove disable_blend_func_extended if 4.1 ever comes out --> - <option name="disable_blend_func_extended" value="true" /> + <!-- remove dual_color_blend_by_location if 4.1 ever comes out --> + <option name="dual_color_blend_by_location" value="true" /> </application> <application name="Unigine Heaven (64-bit)" executable="heaven_x64"> <option name="allow_glsl_extension_directive_midshader" value="true" /> - <!-- remove disable_blend_func_extended if 4.1 ever comes out --> - <option name="disable_blend_func_extended" value="true" /> + <!-- remove dual_color_blend_by_location if 4.1 ever comes out --> + <option name="dual_color_blend_by_location" value="true" /> </application> <application name="Unigine Valley (32-bit)" executable="valley_x86"> <option name="allow_glsl_extension_directive_midshader" value="true" /> - <!-- remove disable_blend_func_extended if 1.1 ever comes out --> - <option name="disable_blend_func_extended" value="true" /> + <!-- remove dual_color_blend_by_location if 1.1 ever comes out --> + <option name="dual_color_blend_by_location" value="true" /> </application> <application name="Unigine Valley (64-bit)" executable="valley_x64"> <option name="allow_glsl_extension_directive_midshader" value="true" /> - <!-- remove disable_blend_func_extended if 1.1 ever comes out --> - <option name="disable_blend_func_extended" value="true" /> + <!-- remove dual_color_blend_by_location if 1.1 ever comes out --> + <option name="dual_color_blend_by_location" value="true" /> </application> <application name="Unigine OilRush (32-bit)" executable="OilRush_x86"> diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 4e5a7217ee2..55e926b239e 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -90,6 +90,11 @@ DRI_CONF_OPT_BEGIN_B(disable_blend_func_extended, def) \ DRI_CONF_DESC(en,gettext("Disable dual source blending")) \ DRI_CONF_OPT_END +#define DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(def) \ +DRI_CONF_OPT_BEGIN_B(dual_color_blend_by_location, def) \ + DRI_CONF_DESC(en,gettext("Identify dual color blending sources by location rather than index")) \ +DRI_CONF_OPT_END + #define DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(def) \ DRI_CONF_OPT_BEGIN_B(disable_glsl_line_continuations, def) \ DRI_CONF_DESC(en,gettext("Disable backslash-based line continuations in GLSL source")) \ diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index 3f429f25d10..e5a3f003eac 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -427,6 +427,8 @@ intelInitContext(struct intel_context *intel, return false; } + driContextSetFlags(&intel->ctx, flags); + driContextPriv->driverPrivate = intel; intel->driContext = driContextPriv; diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 62dcb4dad84..cd28bbb6bbf 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -249,6 +249,7 @@ struct brw_wm_prog_key { bool compute_sample_id:1; unsigned line_aa:2; bool high_quality_derivatives:1; + bool force_dual_color_blend:1; uint16_t drawable_height; uint64_t input_slots_valid; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 9ba33396d36..1032e5a8175 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -750,6 +750,9 @@ brw_process_driconf_options(struct brw_context *brw) ctx->Const.AllowGLSLExtensionDirectiveMidShader = driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); + + brw->dual_color_blend_by_location = + driQueryOptionb(options, "dual_color_blend_by_location"); } GLboolean diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2a29dfe5eec..55d6723eab7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -836,6 +836,7 @@ struct brw_context bool always_flush_cache; bool disable_throttling; bool precompile; + bool dual_color_blend_by_location; driOptionCache optionCache; /** @} */ diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index e8af70cc571..05872255865 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -390,7 +390,7 @@ static const struct brw_device_info brw_device_info_bxt = { .max_hs_threads = 56, .max_ds_threads = 56, .max_gs_threads = 56, - .max_wm_threads = 32, + .max_wm_threads = 64 * 3, .max_cs_threads = 28, .urb = { .size = 64, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 1ba5075731a..922f7200a30 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1671,12 +1671,6 @@ fs_visitor::assign_vs_urb_setup() brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; assert(stage == MESA_SHADER_VERTEX); - int count = _mesa_bitcount_64(vs_prog_data->inputs_read); - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || - vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) - count++; - if (vs_prog_data->uses_drawid) - count++; /* Each attribute is 4 regs. */ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 11e7c7dc102..48cdaf6d9c2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -130,7 +130,11 @@ fs_visitor::nir_setup_outputs() break; } case MESA_SHADER_FRAGMENT: - if (var->data.index > 0) { + if (key->force_dual_color_blend && + var->data.location == FRAG_RESULT_DATA1) { + this->dual_src_output = reg; + this->do_dual_src = true; + } else if (var->data.index > 0) { assert(var->data.location == FRAG_RESULT_DATA0); assert(var->data.index == 1); this->dual_src_output = reg; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 3d83152d365..b7d02e90a86 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -544,7 +544,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, assert(output_size_bytes >= 1); if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES) - return false; + return NULL; /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 39d644ea63a..78846dc3790 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -515,6 +515,10 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) /* _NEW_BUFFERS */ key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + /* _NEW_COLOR */ + key->force_dual_color_blend = brw->dual_color_blend_by_location && + (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc; + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index e1e1e62d26a..bca783aea86 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -79,6 +79,7 @@ DRI_CONF_BEGIN DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false") DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false") DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") + DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION("false") DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false") DRI_CONF_OPT_BEGIN_B(shader_precompile, "true") diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h index 8b64f452572..46444d2c427 100644 --- a/src/mesa/main/context.h +++ b/src/mesa/main/context.h @@ -330,7 +330,8 @@ _mesa_is_gles31(const struct gl_context *ctx) static inline bool _mesa_has_geometry_shaders(const struct gl_context *ctx) { - return _mesa_is_desktop_gl(ctx) && ctx->Version >= 32; + return _mesa_has_OES_geometry_shader(ctx) || + (_mesa_is_desktop_gl(ctx) && ctx->Version >= 32); } diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index a8a667e3c12..f7941817845 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -39,7 +39,6 @@ #include "enums.h" #include "api_arrayelt.h" #include "texstate.h" -#include "drivers/common/meta.h" diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 9cec1762dbe..11f4482f8d2 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -311,6 +311,7 @@ EXT(OES_element_index_uint , dummy_true EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002) EXT(OES_framebuffer_object , dummy_true , x , x , ES1, x , 2005) +EXT(OES_geometry_shader , OES_geometry_shader , x , x , x , 31, 2015) EXT(OES_get_program_binary , dummy_true , x , x , x , ES2, 2008) EXT(OES_mapbuffer , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_packed_depth_stencil , dummy_true , x , x , ES1, ES2, 2007) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 3a0b89f4572..c5400ab1a7e 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3910,6 +3910,7 @@ struct gl_extensions GLboolean OES_texture_half_float; GLboolean OES_texture_half_float_linear; GLboolean OES_compressed_ETC1_RGB8_texture; + GLboolean OES_geometry_shader; GLboolean extension_sentinel; /** The extension string */ const GLubyte *String; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 5854369a28c..a988f41697b 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1374,26 +1374,10 @@ _mesa_DetachShader(GLuint program, GLuint shader) void GLAPIENTRY _mesa_GetAttachedObjectsARB(GLhandleARB container, GLsizei maxCount, - GLsizei * count, GLhandleARB * objARB) + GLsizei * count, GLhandleARB * obj) { - int i; - GLuint *obj; - GET_CURRENT_CONTEXT(ctx); - - obj = calloc(maxCount, sizeof(GLuint)); - if (!obj) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetAttachedObjectsARB"); - return; - } - get_attached_shaders(ctx, container, maxCount, count, obj); - - for (i = 0 ; i < *count; i++) { - objARB[i] = (GLhandleARB)obj[i]; - } - - free(obj); } diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index eb1108124e9..e6412962251 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2527,5 +2527,8 @@ const struct function gles31_functions_possible[] = { /* GL_EXT_blend_func_extended */ { "glGetProgramResourceLocationIndexEXT", 31, -1 }, + /* GL_OES_geometry_shader */ + { "glFramebufferTextureOES", 31, -1}, + { NULL, 0, -1 }, }; |