summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-02-05 14:21:13 -0800
committerJason Ekstrand <[email protected]>2016-02-05 14:21:13 -0800
commit9645b8eb1f1b79e537ad8ddb683507df7bc9da58 (patch)
tree8e554a43a136b5f7951ff8734d42deb5e81c262b /src/gallium
parent3eebf3686be3de10cbeda8acd884e82df3e1438a (diff)
parent41875ac4edd8c884225c44c0840bd20291b410ca (diff)
Merge branch mesa-public/master into vulkan
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/Android.mk5
-rw-r--r--src/gallium/drivers/ddebug/dd_draw.c6
-rw-r--r--src/gallium/drivers/ddebug/dd_pipe.h1
-rw-r--r--src/gallium/drivers/ddebug/dd_screen.c14
-rw-r--r--src/gallium/drivers/ddebug/dd_util.h5
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c4
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h5
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c9
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_debug.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c41
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h6
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c271
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h8
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c290
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h6
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c199
-rw-r--r--src/gallium/drivers/radeonsi/sid.h20
-rw-r--r--src/gallium/drivers/trace/tr_context.c17
-rw-r--r--src/gallium/drivers/virgl/Android.mk35
-rw-r--r--src/gallium/targets/dri/Android.mk8
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c21
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.h2
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c33
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c18
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h5
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c4
-rw-r--r--src/gallium/winsys/virgl/drm/Android.mk34
31 files changed, 867 insertions, 218 deletions
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index b406d4a5480..749be7dfeb9 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -83,6 +83,11 @@ ifneq ($(filter vc4, $(MESA_GPU_DRIVERS)),)
SUBDIRS += winsys/vc4/drm drivers/vc4
endif
+# virgl
+ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),)
+SUBDIRS += winsys/virgl/drm drivers/virgl
+endif
+
# vmwgfx
ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),)
SUBDIRS += winsys/svga/drm drivers/svga
diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c
index 0d7ee9a1686..45e4e10d500 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -88,8 +88,9 @@ struct dd_call
static FILE *
dd_get_file_stream(struct dd_context *dctx)
{
+ struct dd_screen *dscreen = dd_screen(dctx->base.screen);
struct pipe_screen *screen = dctx->pipe->screen;
- FILE *f = dd_get_debug_file();
+ FILE *f = dd_get_debug_file(dscreen->verbose);
if (!f)
return NULL;
@@ -602,6 +603,7 @@ static void
dd_after_draw(struct dd_context *dctx, struct dd_call *call)
{
struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+ struct pipe_context *pipe = dctx->pipe;
if (dctx->num_draw_calls >= dscreen->skip_count) {
switch (dscreen->mode) {
@@ -615,6 +617,8 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call)
}
break;
case DD_DUMP_ALL_CALLS:
+ if (!dscreen->no_flush)
+ pipe->flush(pipe, NULL, 0);
dd_dump_call(dctx, call, 0);
break;
default:
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index a045518dc16..80098dcb644 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -45,6 +45,7 @@ struct dd_screen
unsigned timeout_ms;
enum dd_mode mode;
bool no_flush;
+ bool verbose;
unsigned skip_count;
};
diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c
index 2716845f58f..3706b2d63f5 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -270,7 +270,7 @@ ddebug_screen_create(struct pipe_screen *screen)
{
struct dd_screen *dscreen;
const char *option = debug_get_option("GALLIUM_DDEBUG", NULL);
- bool dump_always = option && !strcmp(option, "always");
+ bool dump_always = option && !strncmp(option, "always", 6);
bool no_flush = option && strstr(option, "noflush");
bool help = option && !strcmp(option, "help");
unsigned timeout = 0;
@@ -280,15 +280,18 @@ ddebug_screen_create(struct pipe_screen *screen)
puts("");
puts("Usage:");
puts("");
- puts(" GALLIUM_DDEBUG=always");
- puts(" Dump context and driver information after every draw call into");
+ puts(" GALLIUM_DDEBUG=\"always [noflush] [verbose]\"");
+ puts(" Flush and dump context and driver information after every draw call into");
puts(" $HOME/"DD_DIR"/.");
puts("");
- puts(" GALLIUM_DDEBUG=[timeout in ms] noflush");
+ puts(" GALLIUM_DDEBUG=\"[timeout in ms] [noflush] [verbose]\"");
puts(" Flush and detect a device hang after every draw call based on the given");
puts(" fence timeout and dump context and driver information into");
puts(" $HOME/"DD_DIR"/ when a hang is detected.");
- puts(" If 'noflush' is specified, only detect hangs in pipe->flush.");
+ puts("");
+ puts(" If 'noflush' is specified, do not flush on every draw call. In hang");
+ puts(" detection mode, this only detect hangs in pipe->flush.");
+ puts(" If 'verbose' is specified, additional information is written to stderr.");
puts("");
puts(" GALLIUM_DDEBUG_SKIP=[count]");
puts(" Skip flush and hang detection for the given initial number of draw calls.");
@@ -339,6 +342,7 @@ ddebug_screen_create(struct pipe_screen *screen)
dscreen->timeout_ms = timeout;
dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS;
dscreen->no_flush = no_flush;
+ dscreen->verbose = strstr(option, "verbose") != NULL;
switch (dscreen->mode) {
case DD_DUMP_ALL_CALLS:
diff --git a/src/gallium/drivers/ddebug/dd_util.h b/src/gallium/drivers/ddebug/dd_util.h
index c217c8eed68..093bdff4a92 100644
--- a/src/gallium/drivers/ddebug/dd_util.h
+++ b/src/gallium/drivers/ddebug/dd_util.h
@@ -40,7 +40,7 @@
#define DD_DIR "ddebug_dumps"
static inline FILE *
-dd_get_debug_file()
+dd_get_debug_file(bool verbose)
{
static unsigned index;
char proc_name[128], dir[256], name[512];
@@ -65,6 +65,9 @@ dd_get_debug_file()
return NULL;
}
+ if (verbose)
+ fprintf(stderr, "dd: dumping to file %s\n", name);
+
return f;
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index e926f56023f..4c066c14cd8 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -705,7 +705,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
if (ret) {
uint32_t *max_compute_units = ret;
- *max_compute_units = rscreen->info.max_compute_units;
+ *max_compute_units = rscreen->info.num_good_compute_units;
}
return sizeof(uint32_t);
@@ -973,7 +973,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20));
printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20));
printf("max_sclk = %i\n", rscreen->info.max_sclk);
- printf("max_compute_units = %i\n", rscreen->info.max_compute_units);
+ printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
printf("max_se = %i\n", rscreen->info.max_se);
printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 27f6e983eea..d66e74f9254 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -236,6 +236,7 @@ struct r600_surface {
/* Misc. color flags. */
bool alphatest_bypass;
bool export_16bpc;
+ bool color_is_int8;
/* Color registers. */
unsigned cb_color_info;
@@ -252,6 +253,10 @@ struct r600_surface {
unsigned cb_color_fmask_slice; /* EG and later */
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
+ unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */
+ unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
+ unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
+ unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
unsigned sx_ps_downconvert; /* Stoney only */
unsigned sx_blend_opt_epsilon; /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index c94f1093ab7..76be37625f3 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1511,12 +1511,14 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
+ bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
+ HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
+ bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
+ HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
@@ -1539,7 +1541,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
+ bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
+ HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index ad304747eab..2e5caa67d10 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -251,7 +251,7 @@ struct radeon_info {
uint64_t gart_size;
uint64_t vram_size;
uint32_t max_sclk;
- uint32_t max_compute_units;
+ uint32_t num_good_compute_units;
uint32_t max_se;
uint32_t max_sh_per_se;
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 75a9d56d110..a93887ec271 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
enum pipe_format format = int_to_norm_format(info->dst.format);
unsigned sample_mask = ~0;
+ /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
+ * the format is R16G16. Use R16A16, which does work.
+ */
+ if (format == PIPE_FORMAT_R16G16_UNORM)
+ format = PIPE_FORMAT_R16A16_UNORM;
+ if (format == PIPE_FORMAT_R16G16_SNORM)
+ format = PIPE_FORMAT_R16A16_SNORM;
+
if (info->src.resource->nr_samples > 1 &&
info->dst.resource->nr_samples <= 1 &&
util_max_layer(info->src.resource, 0) == 0 &&
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 5a08cbfb198..6ef6eeec178 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -61,7 +61,7 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog
/* Compute the scratch buffer size using the maximum number of waves.
* This way we don't need to recompute it for each kernel launch. */
- unsigned scratch_waves = 32 * sctx->screen->b.info.max_compute_units;
+ unsigned scratch_waves = 32 * sctx->screen->b.info.num_good_compute_units;
for (i = 0; i < program->shader.binary.global_symbol_count; i++) {
unsigned offset =
program->shader.binary.global_symbol_offsets[i];
@@ -402,7 +402,7 @@ static void si_launch_grid(
num_waves_for_scratch =
MIN2(num_waves_for_scratch,
- 32 * sctx->screen->b.info.max_compute_units);
+ 32 * sctx->screen->b.info.num_good_compute_units);
si_pm4_set_reg(pm4, R_00B860_COMPUTE_TMPRING_SIZE,
/* The maximum value for WAVES is 32 * num CU.
* If you program this value incorrectly, the GPU will hang if
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index a07b1c56579..e16ebbdef3e 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -771,7 +771,7 @@ void si_check_vm_faults(struct si_context *sctx)
if (!si_vm_fault_occured(sctx, &addr))
return;
- f = dd_get_debug_file();
+ f = dd_get_debug_file(false);
if (!f)
return;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index d157a9ffb00..6c796731a18 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -138,6 +138,22 @@ static void si_release_sampler_views(struct si_sampler_views *views)
si_release_descriptors(&views->desc);
}
+static void si_sampler_view_add_buffers(struct si_context *sctx,
+ struct si_sampler_view *rview)
+{
+ if (rview->resource) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ rview->resource, RADEON_USAGE_READ,
+ r600_get_sampler_view_priority(rview->resource));
+ }
+
+ if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ rview->dcc_buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_DCC);
+ }
+}
+
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
struct si_sampler_views *views)
{
@@ -149,12 +165,7 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
struct si_sampler_view *rview =
(struct si_sampler_view*)views->views[i];
- if (!rview->resource)
- continue;
-
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- rview->resource, RADEON_USAGE_READ,
- r600_get_sampler_view_priority(rview->resource));
+ si_sampler_view_add_buffers(sctx, rview);
}
if (!views->desc.buffer)
@@ -176,15 +187,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
struct si_sampler_view *rview =
(struct si_sampler_view*)view;
- if (rview->resource)
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- rview->resource, RADEON_USAGE_READ,
- r600_get_sampler_view_priority(rview->resource));
-
- if (rview->dcc_buffer && rview->dcc_buffer != rview->resource)
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- rview->dcc_buffer, RADEON_USAGE_READ,
- RADEON_PRIO_DCC);
+ si_sampler_view_add_buffers(sctx, rview);
pipe_sampler_view_reference(&views->views[slot], view);
memcpy(views->desc.list + slot*8, view_desc, 8*4);
@@ -978,9 +981,11 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, vs_base, true);
si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, vs_base, true);
- /* The TESSEVAL shader needs this for the ESGS ring buffer. */
- si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
- R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+ if (sctx->tes_shader.cso) {
+ /* The TESSEVAL shader needs this for the ESGS ring buffer. */
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+ }
} else if (sctx->tes_shader.cso) {
/* The TESSEVAL shader needs this for streamout. */
si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3e20c3b81fa..0c1ae90f9da 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -208,7 +208,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
* this for non-cs shaders. Using the wrong value here can result in
* GPU lockups, but the maximum value seems to always work.
*/
- sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units;
+ sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;
#if HAVE_LLVM >= 0x0306
/* Initialize LLVM TargetMachine */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index f83cb024f0e..e2725fe3679 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -125,7 +125,11 @@ struct si_framebuffer {
unsigned log_samples;
unsigned cb0_is_integer;
unsigned compressed_cb_mask;
- unsigned export_16bpc;
+ unsigned spi_shader_col_format;
+ unsigned spi_shader_col_format_alpha;
+ unsigned spi_shader_col_format_blend;
+ unsigned spi_shader_col_format_blend_alpha;
+ unsigned color_is_int8; /* bitmask */
unsigned dirty_cbufs;
bool dirty_zsbuf;
};
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2de7def8dd2..94c1129c88d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -68,6 +68,7 @@ struct si_shader_context
struct si_shader *shader;
struct si_screen *screen;
unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
+ bool is_gs_copy_shader;
int param_streamout_config;
int param_streamout_write_index;
int param_streamout_offset[4];
@@ -1119,9 +1120,20 @@ static void declare_system_value(
value = get_sample_id(radeon_bld);
break;
- case TGSI_SEMANTIC_SAMPLEPOS:
- value = load_sample_position(radeon_bld, get_sample_id(radeon_bld));
+ case TGSI_SEMANTIC_SAMPLEPOS: {
+ LLVMValueRef pos[4] = {
+ LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_X_FLOAT),
+ LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Y_FLOAT),
+ lp_build_const_float(gallivm, 0),
+ lp_build_const_float(gallivm, 0)
+ };
+ pos[0] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base,
+ TGSI_OPCODE_FRC, pos[0]);
+ pos[1] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base,
+ TGSI_OPCODE_FRC, pos[1]);
+ value = lp_build_gather_values(gallivm, pos, 4);
break;
+ }
case TGSI_SEMANTIC_SAMPLEMASK:
/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
@@ -1255,6 +1267,28 @@ static LLVMValueRef fetch_constant(
return result;
}
+/* Upper 16 bits must be zero. */
+static LLVMValueRef si_llvm_pack_two_int16(struct gallivm_state *gallivm,
+ LLVMValueRef val[2])
+{
+ return LLVMBuildOr(gallivm->builder, val[0],
+ LLVMBuildShl(gallivm->builder, val[1],
+ lp_build_const_int32(gallivm, 16),
+ ""), "");
+}
+
+/* Upper 16 bits are ignored and will be dropped. */
+static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct gallivm_state *gallivm,
+ LLVMValueRef val[2])
+{
+ LLVMValueRef v[2] = {
+ LLVMBuildAnd(gallivm->builder, val[0],
+ lp_build_const_int32(gallivm, 0xffff), ""),
+ val[1],
+ };
+ return si_llvm_pack_two_int16(gallivm, v);
+}
+
/* Initialize arguments for the shader export intrinsic */
static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
LLVMValueRef *values,
@@ -1265,16 +1299,15 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
struct lp_build_context *uint =
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
struct lp_build_context *base = &bld_base->base;
- unsigned compressed = 0;
+ struct gallivm_state *gallivm = base->gallivm;
+ LLVMBuilderRef builder = base->gallivm->builder;
+ LLVMValueRef val[4];
+ unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
unsigned chan;
+ bool is_int8;
- /* XXX: This controls which components of the output
- * registers actually get exported. (e.g bit 0 means export
- * X component, bit 1 means export Y component, etc.) I'm
- * hard coding this to 0xf for now. In the future, we might
- * want to do something else.
- */
- args[0] = lp_build_const_int32(base->gallivm, 0xf);
+ /* Default is 0xf. Adjusted below depending on the format. */
+ args[0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
/* Specify whether the EXEC mask represents the valid mask */
args[1] = uint->zero;
@@ -1286,17 +1319,47 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
args[3] = lp_build_const_int32(base->gallivm, target);
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ const union si_shader_key *key = &si_shader_ctx->shader->key;
+ unsigned col_formats = key->ps.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;
- if (cbuf >= 0 && cbuf < 8)
- compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
+ assert(cbuf >= 0 && cbuf < 8);
+ spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
+ is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1;
}
- /* Set COMPR flag */
- args[4] = compressed ? uint->one : uint->zero;
+ args[4] = uint->zero; /* COMPR flag */
+ args[5] = base->undef;
+ args[6] = base->undef;
+ args[7] = base->undef;
+ args[8] = base->undef;
+
+ switch (spi_shader_col_format) {
+ case V_028714_SPI_SHADER_ZERO:
+ args[0] = uint->zero; /* writemask */
+ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
+ break;
+
+ case V_028714_SPI_SHADER_32_R:
+ args[0] = uint->one; /* writemask */
+ args[5] = values[0];
+ break;
+
+ case V_028714_SPI_SHADER_32_GR:
+ args[0] = lp_build_const_int32(base->gallivm, 0x3); /* writemask */
+ args[5] = values[0];
+ args[6] = values[1];
+ break;
+
+ case V_028714_SPI_SHADER_32_AR:
+ args[0] = lp_build_const_int32(base->gallivm, 0x9); /* writemask */
+ args[5] = values[0];
+ args[8] = values[3];
+ break;
+
+ case V_028714_SPI_SHADER_FP16_ABGR:
+ args[4] = uint->one; /* COMPR flag */
- if (compressed) {
- /* Pixel shader needs to pack output values before export */
for (chan = 0; chan < 2; chan++) {
LLVMValueRef pack_args[2] = {
values[2 * chan],
@@ -1306,18 +1369,107 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
packed = lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.packf16",
- LLVMInt32TypeInContext(base->gallivm->context),
- pack_args, 2,
+ uint->elem_type, pack_args, 2,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
args[chan + 5] =
LLVMBuildBitCast(base->gallivm->builder,
- packed,
- LLVMFloatTypeInContext(base->gallivm->context),
- "");
- args[chan + 7] = base->undef;
+ packed, base->elem_type, "");
}
- } else
+ break;
+
+ case V_028714_SPI_SHADER_UNORM16_ABGR:
+ for (chan = 0; chan < 4; chan++) {
+ val[chan] = radeon_llvm_saturate(bld_base, values[chan]);
+ val[chan] = LLVMBuildFMul(builder, val[chan],
+ lp_build_const_float(gallivm, 65535), "");
+ val[chan] = LLVMBuildFAdd(builder, val[chan],
+ lp_build_const_float(gallivm, 0.5), "");
+ val[chan] = LLVMBuildFPToUI(builder, val[chan],
+ uint->elem_type, "");
+ }
+
+ args[4] = uint->one; /* COMPR flag */
+ args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int16(gallivm, val));
+ args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int16(gallivm, val+2));
+ break;
+
+ case V_028714_SPI_SHADER_SNORM16_ABGR:
+ for (chan = 0; chan < 4; chan++) {
+ /* Clamp between [-1, 1]. */
+ val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN,
+ values[chan],
+ lp_build_const_float(gallivm, 1));
+ val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
+ val[chan],
+ lp_build_const_float(gallivm, -1));
+ /* Convert to a signed integer in [-32767, 32767]. */
+ val[chan] = LLVMBuildFMul(builder, val[chan],
+ lp_build_const_float(gallivm, 32767), "");
+ /* If positive, add 0.5, else add -0.5. */
+ val[chan] = LLVMBuildFAdd(builder, val[chan],
+ LLVMBuildSelect(builder,
+ LLVMBuildFCmp(builder, LLVMRealOGE,
+ val[chan], base->zero, ""),
+ lp_build_const_float(gallivm, 0.5),
+ lp_build_const_float(gallivm, -0.5), ""), "");
+ val[chan] = LLVMBuildFPToSI(builder, val[chan], uint->elem_type, "");
+ }
+
+ args[4] = uint->one; /* COMPR flag */
+ args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int32_as_int16(gallivm, val));
+ args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int32_as_int16(gallivm, val+2));
+ break;
+
+ case V_028714_SPI_SHADER_UINT16_ABGR: {
+ LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
+ 255 : 65535);
+ /* Clamp. */
+ for (chan = 0; chan < 4; chan++) {
+ val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
+ val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN,
+ val[chan], max);
+ }
+
+ args[4] = uint->one; /* COMPR flag */
+ args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int16(gallivm, val));
+ args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int16(gallivm, val+2));
+ break;
+ }
+
+ case V_028714_SPI_SHADER_SINT16_ABGR: {
+ LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
+ 127 : 32767);
+ LLVMValueRef min = lp_build_const_int32(gallivm, is_int8 ?
+ -128 : -32768);
+ /* Clamp. */
+ for (chan = 0; chan < 4; chan++) {
+ val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
+ val[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_IMIN,
+ val[chan], max);
+ val[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_IMAX,
+ val[chan], min);
+ }
+
+ args[4] = uint->one; /* COMPR flag */
+ args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int32_as_int16(gallivm, val));
+ args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ si_llvm_pack_two_int32_as_int16(gallivm, val+2));
+ break;
+ }
+
+ case V_028714_SPI_SHADER_32_ABGR:
memcpy(&args[5], values, sizeof(values[0]) * 4);
+ break;
+ }
}
static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
@@ -2000,6 +2152,8 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
struct si_shader_output_values *outputs = NULL;
int i,j;
+ assert(!si_shader_ctx->is_gs_copy_shader);
+
outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
/* Vertex color clamping.
@@ -2008,8 +2162,7 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
* an IF statement is added that clamps all colors if the constant
* is true.
*/
- if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
- !si_shader_ctx->shader->is_gs_copy_shader) {
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
struct lp_build_if_state if_ctx;
LLVMValueRef cond = NULL;
LLVMValueRef addr, val;
@@ -3312,7 +3465,9 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
- lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local",
+ lp_build_intrinsic(gallivm->builder,
+ HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
+ : "llvm.AMDGPU.barrier.local",
LLVMVoidTypeInContext(gallivm->context), NULL, 0,
LLVMNoUnwindAttribute);
}
@@ -3403,7 +3558,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
params[SI_PARAM_LS_OUT_LAYOUT] = i32;
num_params = SI_PARAM_LS_OUT_LAYOUT+1;
} else {
- if (shader->is_gs_copy_shader) {
+ if (si_shader_ctx->is_gs_copy_shader) {
last_array_pointer = SI_PARAM_CONST_BUFFERS;
num_params = SI_PARAM_CONST_BUFFERS+1;
} else {
@@ -3676,7 +3831,7 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
}
- if (si_shader_ctx->shader->is_gs_copy_shader) {
+ if (si_shader_ctx->is_gs_copy_shader) {
LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
si_shader_ctx->gsvs_ring[0] =
@@ -3850,22 +4005,65 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary
static void si_shader_dump_stats(struct si_screen *sscreen,
struct si_shader_config *conf,
+ unsigned num_inputs,
unsigned code_size,
struct pipe_debug_callback *debug,
unsigned processor)
{
+ unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
+ unsigned lds_per_wave = 0;
+ unsigned max_simd_waves = 10;
+
+ /* Compute LDS usage for PS. */
+ if (processor == TGSI_PROCESSOR_FRAGMENT) {
+ /* The minimum usage per wave is (num_inputs * 36). The maximum
+ * usage is (num_inputs * 36 * 16).
+ * We can get anything in between and it varies between waves.
+ *
+ * Other stages don't know the size at compile time or don't
+ * allocate LDS per wave, but instead they do it per thread group.
+ */
+ lds_per_wave = conf->lds_size * lds_increment +
+ align(num_inputs * 36, lds_increment);
+ }
+
+ /* Compute the per-SIMD wave counts. */
+ if (conf->num_sgprs) {
+ if (sscreen->b.chip_class >= VI)
+ max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+ else
+ max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+ }
+
+ if (conf->num_vgprs)
+ max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+ /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+ * that PS can use.
+ */
+ if (lds_per_wave)
+ max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
if (r600_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "*** SHADER STATS ***\n"
- "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
- "Scratch: %d bytes per wave\n********************\n",
+ "SGPRS: %d\n"
+ "VGPRS: %d\n"
+ "Code Size: %d bytes\n"
+ "LDS: %d blocks\n"
+ "Scratch: %d bytes per wave\n"
+ "Max Waves: %d\n"
+ "********************\n",
conf->num_sgprs, conf->num_vgprs, code_size,
- conf->lds_size, conf->scratch_bytes_per_wave);
+ conf->lds_size, conf->scratch_bytes_per_wave,
+ max_simd_waves);
}
pipe_debug_message(debug, SHADER_INFO,
- "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d",
+ "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
+ "LDS: %d Scratch: %d Max Waves: %d",
conf->num_sgprs, conf->num_vgprs, code_size,
- conf->lds_size, conf->scratch_bytes_per_wave);
+ conf->lds_size, conf->scratch_bytes_per_wave,
+ max_simd_waves);
}
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
@@ -3876,6 +4074,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
si_shader_dump_disassembly(&shader->binary, debug);
si_shader_dump_stats(sscreen, &shader->config,
+ shader->selector->info.num_inputs,
shader->binary.code_size, debug, processor);
}
@@ -3924,7 +4123,6 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
struct lp_build_context *base = &bld_base->base;
struct lp_build_context *uint = &bld_base->uint_bld;
- struct si_shader *shader = si_shader_ctx->shader;
struct si_shader_output_values *outputs;
struct tgsi_shader_info *gsinfo = &gs->selector->info;
LLVMValueRef args[9];
@@ -3933,7 +4131,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
si_shader_ctx->type = TGSI_PROCESSOR_VERTEX;
- shader->is_gs_copy_shader = true;
+ si_shader_ctx->is_gs_copy_shader = true;
radeon_llvm_context_init(&si_shader_ctx->radeon_bld);
@@ -4031,7 +4229,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
break;
case PIPE_SHADER_FRAGMENT:
- fprintf(f, " export_16bpc = 0x%X\n", key->ps.export_16bpc);
+ fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format);
fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf);
fprintf(f, " color_two_side = %u\n", key->ps.color_two_side);
fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
@@ -4208,7 +4406,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
shader->gs_copy_shader->selector = shader->selector;
- shader->gs_copy_shader->key = shader->key;
si_shader_ctx.shader = shader->gs_copy_shader;
if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
shader, dump, debug))) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1635358d505..c1512078a18 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -213,6 +213,10 @@ struct si_shader_selector {
/* PS parameters. */
unsigned db_shader_control;
+ /* Set 0xf or 0x0 (4 bits) per each written output.
+ * ANDed with spi_shader_col_format.
+ */
+ unsigned colors_written_4bit;
/* masks of "get_unique_index" bits */
uint64_t outputs_written;
@@ -232,7 +236,8 @@ struct si_shader_selector {
union si_shader_key {
struct {
- unsigned export_16bpc:8;
+ unsigned spi_shader_col_format;
+ unsigned color_is_int8:8;
unsigned last_cbuf:3;
unsigned color_two_side:1;
unsigned alpha_func:3;
@@ -292,7 +297,6 @@ struct si_shader {
bool uses_instanceid;
unsigned nr_pos_exports;
unsigned nr_param_exports;
- bool is_gs_copy_shader;
bool dx10_clamp_mode; /* convert NaNs to 0 */
};
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 2a6d2c6ff36..9e0ccfc5dde 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -403,6 +403,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
if (!blend)
return NULL;
+ blend->alpha_to_coverage = state->alpha_to_coverage;
blend->alpha_to_one = state->alpha_to_one;
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
@@ -419,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
S_028B70_ALPHA_TO_MASK_OFFSET3(2));
+ if (state->alpha_to_coverage)
+ blend->need_src_alpha_4bit |= 0xf;
+
blend->cb_target_mask = 0;
for (int i = 0; i < 8; i++) {
/* state->rt entries > 0 only written if independent blending */
@@ -433,6 +437,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
unsigned blend_cntl = 0;
+ if (!state->rt[j].colormask)
+ continue;
+
/* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
@@ -453,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
}
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
+
+ blend->blend_enable_4bit |= 0xf << (i * 4);
+
+ /* This is only important for formats without alpha. */
+ if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
+ blend->need_src_alpha_4bit |= 0xf << (i * 4);
}
if (blend->cb_target_mask) {
@@ -1266,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
}
}
-/* Returns the size in bits of the widest component of a CB format */
-static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
-{
- switch(colorformat) {
- case V_028C70_COLOR_4_4_4_4:
- return 4;
-
- case V_028C70_COLOR_1_5_5_5:
- case V_028C70_COLOR_5_5_5_1:
- return 5;
-
- case V_028C70_COLOR_5_6_5:
- return 6;
-
- case V_028C70_COLOR_8:
- case V_028C70_COLOR_8_8:
- case V_028C70_COLOR_8_8_8_8:
- return 8;
-
- case V_028C70_COLOR_10_10_10_2:
- case V_028C70_COLOR_2_10_10_10:
- return 10;
-
- case V_028C70_COLOR_10_11_11:
- case V_028C70_COLOR_11_11_10:
- return 11;
-
- case V_028C70_COLOR_16:
- case V_028C70_COLOR_16_16:
- case V_028C70_COLOR_16_16_16_16:
- return 16;
-
- case V_028C70_COLOR_8_24:
- case V_028C70_COLOR_24_8:
- return 24;
-
- case V_028C70_COLOR_32:
- case V_028C70_COLOR_32_32:
- case V_028C70_COLOR_32_32_32_32:
- case V_028C70_COLOR_X24_8_32_FLOAT:
- return 32;
- }
-
- assert(!"Unknown maximum component size");
- return 0;
-}
-
static uint32_t si_translate_dbformat(enum pipe_format format)
{
switch (format) {
@@ -1405,6 +1376,30 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
}
+ if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
+ sscreen->b.family >= CHIP_STONEY) {
+ switch (format) {
+ case PIPE_FORMAT_ETC1_RGB8:
+ case PIPE_FORMAT_ETC2_RGB8:
+ case PIPE_FORMAT_ETC2_SRGB8:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
+ case PIPE_FORMAT_ETC2_RGB8A1:
+ case PIPE_FORMAT_ETC2_SRGB8A1:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
+ case PIPE_FORMAT_ETC2_RGBA8:
+ case PIPE_FORMAT_ETC2_SRGBA8:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
+ case PIPE_FORMAT_ETC2_R11_UNORM:
+ case PIPE_FORMAT_ETC2_R11_SNORM:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_R;
+ case PIPE_FORMAT_ETC2_RG11_UNORM:
+ case PIPE_FORMAT_ETC2_RG11_SNORM:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
+ default:
+ goto out_unknown;
+ }
+ }
+
if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
if (!enable_compressed_formats)
goto out_unknown;
@@ -1880,6 +1875,123 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
* framebuffer handling
*/
+static void si_choose_spi_color_formats(struct r600_surface *surf,
+ unsigned format, unsigned swap,
+ unsigned ntype, bool is_depth)
+{
+ /* Alpha is needed for alpha-to-coverage.
+ * Blending may be with or without alpha.
+ */
+ unsigned normal = 0; /* most optimal, may not support blending or export alpha */
+ unsigned alpha = 0; /* exports alpha, but may not support blending */
+ unsigned blend = 0; /* supports blending, but may not export alpha */
+ unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
+
+ /* Choose the SPI color formats. These are required values for Stoney/RB+.
+ * Other chips have multiple choices, though they are not necessarily better.
+ */
+ switch (format) {
+ case V_028C70_COLOR_5_6_5:
+ case V_028C70_COLOR_1_5_5_5:
+ case V_028C70_COLOR_5_5_5_1:
+ case V_028C70_COLOR_4_4_4_4:
+ case V_028C70_COLOR_10_11_11:
+ case V_028C70_COLOR_11_11_10:
+ case V_028C70_COLOR_8:
+ case V_028C70_COLOR_8_8:
+ case V_028C70_COLOR_8_8_8_8:
+ case V_028C70_COLOR_10_10_10_2:
+ case V_028C70_COLOR_2_10_10_10:
+ if (ntype == V_028C70_NUMBER_UINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+ else if (ntype == V_028C70_NUMBER_SINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+ else
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+ break;
+
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_16_16:
+ case V_028C70_COLOR_16_16_16_16:
+ if (ntype == V_028C70_NUMBER_UNORM ||
+ ntype == V_028C70_NUMBER_SNORM) {
+ /* UNORM16 and SNORM16 don't support blending */
+ if (ntype == V_028C70_NUMBER_UNORM)
+ normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
+ else
+ normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
+
+ /* Use 32 bits per channel for blending. */
+ if (format == V_028C70_COLOR_16) {
+ if (swap == V_028C70_SWAP_STD) { /* R */
+ blend = V_028714_SPI_SHADER_32_R;
+ blend_alpha = V_028714_SPI_SHADER_32_AR;
+ } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+ blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ } else if (format == V_028C70_COLOR_16_16) {
+ if (swap == V_028C70_SWAP_STD) { /* RG */
+ blend = V_028714_SPI_SHADER_32_GR;
+ blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+ } else if (swap == V_028C70_SWAP_ALT) /* RA */
+ blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ } else /* 16_16_16_16 */
+ blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+ } else if (ntype == V_028C70_NUMBER_UINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+ else if (ntype == V_028C70_NUMBER_SINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+ else if (ntype == V_028C70_NUMBER_FLOAT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+ else
+ assert(0);
+ break;
+
+ case V_028C70_COLOR_32:
+ if (swap == V_028C70_SWAP_STD) { /* R */
+ blend = normal = V_028714_SPI_SHADER_32_R;
+ alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
+ } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ break;
+
+ case V_028C70_COLOR_32_32:
+ if (swap == V_028C70_SWAP_STD) { /* RG */
+ blend = normal = V_028714_SPI_SHADER_32_GR;
+ alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+ } else if (swap == V_028C70_SWAP_ALT) /* RA */
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ break;
+
+ case V_028C70_COLOR_32_32_32_32:
+ case V_028C70_COLOR_8_24:
+ case V_028C70_COLOR_24_8:
+ case V_028C70_COLOR_X24_8_32_FLOAT:
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+
+ /* The DB->CB copy needs 32_ABGR. */
+ if (is_depth)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+
+ surf->spi_shader_col_format = normal;
+ surf->spi_shader_col_format_alpha = alpha;
+ surf->spi_shader_col_format_blend = blend;
+ surf->spi_shader_col_format_blend_alpha = blend_alpha;
+}
+
static void si_initialize_color_surface(struct si_context *sctx,
struct r600_surface *surf)
{
@@ -1893,7 +2005,6 @@ static void si_initialize_color_surface(struct si_context *sctx,
const struct util_format_description *desc;
int i;
unsigned blend_clamp = 0, blend_bypass = 0;
- unsigned max_comp_size;
/* Layered rendering doesn't work with LINEAR_GENERAL.
* (LINEAR_ALIGNED and others work) */
@@ -1971,6 +2082,12 @@ static void si_initialize_color_surface(struct si_context *sctx,
blend_bypass = 1;
}
+ if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
+ (format == V_028C70_COLOR_8 ||
+ format == V_028C70_COLOR_8_8 ||
+ format == V_028C70_COLOR_8_8_8_8))
+ surf->color_is_int8 = true;
+
color_info = S_028C70_FORMAT(format) |
S_028C70_COMP_SWAP(swap) |
S_028C70_BLEND_CLAMP(blend_clamp) |
@@ -2050,13 +2167,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
}
/* Determine pixel shader export format */
- max_comp_size = si_colorformat_max_comp_size(format);
- if (ntype == V_028C70_NUMBER_SRGB ||
- ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
- max_comp_size <= 10) ||
- (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
- surf->export_16bpc = true;
- }
+ si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
if (sctx->b.family == CHIP_STONEY &&
!(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
@@ -2283,7 +2394,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
- sctx->framebuffer.export_16bpc = 0;
+ sctx->framebuffer.spi_shader_col_format = 0;
+ sctx->framebuffer.spi_shader_col_format_alpha = 0;
+ sctx->framebuffer.spi_shader_col_format_blend = 0;
+ sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
+ sctx->framebuffer.color_is_int8 = 0;
+
sctx->framebuffer.compressed_cb_mask = 0;
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
@@ -2304,22 +2420,35 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
si_initialize_color_surface(sctx, surf);
}
- if (surf->export_16bpc) {
- sctx->framebuffer.export_16bpc |= 1 << i;
- }
+ sctx->framebuffer.spi_shader_col_format |=
+ surf->spi_shader_col_format << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_alpha |=
+ surf->spi_shader_col_format_alpha << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend |=
+ surf->spi_shader_col_format_blend << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+ surf->spi_shader_col_format_blend_alpha << (i * 4);
+
+ if (surf->color_is_int8)
+ sctx->framebuffer.color_is_int8 |= 1 << i;
if (rtex->fmask.size && rtex->cmask.size) {
sctx->framebuffer.compressed_cb_mask |= 1 << i;
}
r600_context_add_resource_size(ctx, surf->base.texture);
}
- /* Set the 16BPC export for possible dual-src blending. */
- if (i == 1 && surf && surf->export_16bpc) {
- sctx->framebuffer.export_16bpc |= 1 << 1;
+ /* Set the second SPI format for possible dual-src blending. */
+ if (i == 1 && surf) {
+ sctx->framebuffer.spi_shader_col_format |=
+ surf->spi_shader_col_format << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_alpha |=
+ surf->spi_shader_col_format_alpha << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend |=
+ surf->spi_shader_col_format_blend << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+ surf->spi_shader_col_format_blend_alpha << (i * 4);
}
- assert(!(sctx->framebuffer.export_16bpc & ~0xff));
-
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
@@ -2703,12 +2832,17 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
case PIPE_FORMAT_DXT3_SRGBA:
case PIPE_FORMAT_DXT5_SRGBA:
case PIPE_FORMAT_BPTC_SRGBA:
+ case PIPE_FORMAT_ETC2_SRGB8:
+ case PIPE_FORMAT_ETC2_SRGB8A1:
+ case PIPE_FORMAT_ETC2_SRGBA8:
num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
break;
case PIPE_FORMAT_RGTC1_SNORM:
case PIPE_FORMAT_LATC1_SNORM:
case PIPE_FORMAT_RGTC2_SNORM:
case PIPE_FORMAT_LATC2_SNORM:
+ case PIPE_FORMAT_ETC2_R11_SNORM:
+ case PIPE_FORMAT_ETC2_RG11_SNORM:
/* implies float, so use SNORM/UNORM to determine
whether data is signed or not */
case PIPE_FORMAT_BPTC_RGB_FLOAT:
@@ -3596,12 +3730,32 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
if (sctx->b.chip_class >= CIK) {
- si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffc));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
- si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xfffe));
+ si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
+
+ if (sscreen->b.info.num_good_compute_units /
+ (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
+ /* Too few available compute units per SH. Disallowing
+ * VS to run on CU0 could hurt us more than late VS
+ * allocation would help.
+ *
+ * LATE_ALLOC_VS = 2 is the highest safe number.
+ */
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
+ } else {
+ /* Set LATE_ALLOC_VS == 31. It should be less than
+ * the number of scratch waves. Limitations:
+ * - VS can't execute on CU0.
+ * - If HS writes outputs to LDS, LS can't execute on CU0.
+ */
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
+ si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
+ si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
+ }
+
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f5ca661f8d7..be3488e6dba 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -39,8 +39,14 @@ struct si_shader;
struct si_state_blend {
struct si_pm4_state pm4;
uint32_t cb_target_mask;
+ bool alpha_to_coverage;
bool alpha_to_one;
bool dual_src_blend;
+ /* Set 0xf or 0x0 (4 bits) per render target if the following is
+ * true. ANDed with spi_shader_col_format.
+ */
+ unsigned blend_enable_4bit;
+ unsigned need_src_alpha_4bit;
};
struct si_state_rasterizer {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8ff70b44d45..36174eb5a94 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -212,13 +212,37 @@ static void si_shader_es(struct si_shader *shader)
si_set_tesseval_regs(shader, pm4);
}
+/**
+ * Calculate the appropriate setting of VGT_GS_MODE when \p shader is a
+ * geometry shader.
+ */
+static uint32_t si_vgt_gs_mode(struct si_shader *shader)
+{
+ unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
+ unsigned cut_mode;
+
+ if (gs_max_vert_out <= 128) {
+ cut_mode = V_028A40_GS_CUT_128;
+ } else if (gs_max_vert_out <= 256) {
+ cut_mode = V_028A40_GS_CUT_256;
+ } else if (gs_max_vert_out <= 512) {
+ cut_mode = V_028A40_GS_CUT_512;
+ } else {
+ assert(gs_max_vert_out <= 1024);
+ cut_mode = V_028A40_GS_CUT_1024;
+ }
+
+ return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
+ S_028A40_CUT_MODE(cut_mode)|
+ S_028A40_ES_WRITE_OPTIMIZE(1) |
+ S_028A40_GS_WRITE_OPTIMIZE(1);
+}
+
static void si_shader_gs(struct si_shader *shader)
{
unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size;
- unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
- unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
@@ -232,22 +256,7 @@ static void si_shader_gs(struct si_shader *shader)
if (!pm4)
return;
- if (gs_max_vert_out <= 128) {
- cut_mode = V_028A40_GS_CUT_128;
- } else if (gs_max_vert_out <= 256) {
- cut_mode = V_028A40_GS_CUT_256;
- } else if (gs_max_vert_out <= 512) {
- cut_mode = V_028A40_GS_CUT_512;
- } else {
- assert(gs_max_vert_out <= 1024);
- cut_mode = V_028A40_GS_CUT_1024;
- }
-
- si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
- S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
- S_028A40_CUT_MODE(cut_mode)|
- S_028A40_ES_WRITE_OPTIMIZE(1) |
- S_028A40_GS_WRITE_OPTIMIZE(1));
+ si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(shader));
si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
@@ -255,7 +264,7 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
- si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
+ si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, shader->selector->gs_max_out_vertices);
si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize >> 2);
si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) ? gs_vert_itemsize >> 2 : 0);
@@ -289,7 +298,14 @@ static void si_shader_gs(struct si_shader *shader)
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
-static void si_shader_vs(struct si_shader *shader)
+/**
+ * Compute the state for \p shader, which will run as a vertex shader on the
+ * hardware.
+ *
+ * If \p gs is non-NULL, it points to the geometry shader for which this shader
+ * is the copy shader.
+ */
+static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
{
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
@@ -304,20 +320,26 @@ static void si_shader_vs(struct si_shader *shader)
if (!pm4)
return;
- /* If this is the GS copy shader, the GS state writes this register.
- * Otherwise, the VS state writes it.
+ /* We always write VGT_GS_MODE in the VS state, because every switch
+ * between different shader pipelines involving a different GS or no
+ * GS at all involves a switch of the VS (different GS use different
+ * copy shaders). On the other hand, when the API switches from a GS to
+ * no GS and then back to the same GS used originally, the GS state is
+ * not sent again.
*/
- if (!shader->is_gs_copy_shader) {
+ if (!gs) {
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
- } else
+ } else {
+ si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs));
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
+ }
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
- if (shader->is_gs_copy_shader) {
+ if (gs) {
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
@@ -382,13 +404,58 @@ static void si_shader_vs(struct si_shader *shader)
si_set_tesseval_regs(shader, pm4);
}
+static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
+{
+ unsigned value = shader->key.ps.spi_shader_col_format;
+ unsigned i, num_targets = (util_last_bit(value) + 3) / 4;
+
+ /* If the i-th target format is set, all previous target formats must
+ * be non-zero to avoid hangs.
+ */
+ for (i = 0; i < num_targets; i++)
+ if (!(value & (0xf << (i * 4))))
+ value |= V_028714_SPI_SHADER_32_R << (i * 4);
+
+ return value;
+}
+
+static unsigned si_get_cb_shader_mask(unsigned spi_shader_col_format)
+{
+ unsigned i, cb_shader_mask = 0;
+
+ for (i = 0; i < 8; i++) {
+ switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
+ case V_028714_SPI_SHADER_ZERO:
+ break;
+ case V_028714_SPI_SHADER_32_R:
+ cb_shader_mask |= 0x1 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_GR:
+ cb_shader_mask |= 0x3 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_AR:
+ cb_shader_mask |= 0x9 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_FP16_ABGR:
+ case V_028714_SPI_SHADER_UNORM16_ABGR:
+ case V_028714_SPI_SHADER_SNORM16_ABGR:
+ case V_028714_SPI_SHADER_UINT16_ABGR:
+ case V_028714_SPI_SHADER_SINT16_ABGR:
+ case V_028714_SPI_SHADER_32_ABGR:
+ cb_shader_mask |= 0xf << (i * 4);
+ break;
+ default:
+ assert(0);
+ }
+ }
+ return cb_shader_mask;
+}
+
static void si_shader_ps(struct si_shader *shader)
{
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
- unsigned i, spi_ps_in_control;
- unsigned spi_shader_col_format = 0, cb_shader_mask = 0;
- unsigned colors_written, export_16bpc;
+ unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
@@ -423,23 +490,18 @@ static void si_shader_ps(struct si_shader *shader)
TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
- /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
- colors_written = info->colors_written;
- export_16bpc = shader->key.ps.export_16bpc;
+ spi_shader_col_format = si_get_spi_shader_col_format(shader);
+ cb_shader_mask = si_get_cb_shader_mask(spi_shader_col_format);
- if (info->colors_written == 0x1 &&
- info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
- colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1;
- }
-
- while (colors_written) {
- i = u_bit_scan(&colors_written);
- if (export_16bpc & (1 << i))
- spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * i);
- else
- spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * i);
- cb_shader_mask |= 0xf << (4 * i);
- }
+ /* This must be non-zero for alpha-test/kill to work.
+ * The hardware ignores the EXEC mask if no export memory is allocated.
+ * Don't add this to CB_SHADER_MASK.
+ */
+ if (!spi_shader_col_format &&
+ !info->writes_z && !info->writes_stencil && !info->writes_samplemask &&
+ (shader->selector->info.uses_kill ||
+ shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
+ spi_shader_col_format = V_028714_SPI_SHADER_32_R;
/* Set interpolation controls. */
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
@@ -498,7 +560,7 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
else if (shader->key.vs.as_es)
si_shader_es(shader);
else
- si_shader_vs(shader);
+ si_shader_vs(shader, NULL);
break;
case PIPE_SHADER_TESS_CTRL:
si_shader_hs(shader);
@@ -507,11 +569,11 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
if (shader->key.tes.as_es)
si_shader_es(shader);
else
- si_shader_vs(shader);
+ si_shader_vs(shader, NULL);
break;
case PIPE_SHADER_GEOMETRY:
si_shader_gs(shader);
- si_shader_vs(shader->gs_copy_shader);
+ si_shader_vs(shader->gs_copy_shader, shader);
break;
case PIPE_SHADER_FRAGMENT:
si_shader_ps(shader);
@@ -571,12 +633,47 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
break;
case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ struct si_state_blend *blend = sctx->queued.named.blend;
if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
sel->info.colors_written == 0x1)
key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
- key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
+ if (blend) {
+ /* Select the shader color format based on whether
+ * blending or alpha are needed.
+ */
+ key->ps.spi_shader_col_format =
+ (blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend_alpha) |
+ (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend) |
+ (~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_alpha) |
+ (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format);
+ } else
+ key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
+
+ /* If alpha-to-coverage is enabled, we have to export alpha
+ * even if there is no color buffer.
+ */
+ if (!(key->ps.spi_shader_col_format & 0xf) &&
+ blend && blend->alpha_to_coverage)
+ key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
+
+ /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+ * to the range supported by the type if a channel has less
+ * than 16 bits and the export format is 16_ABGR.
+ */
+ if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
+ key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
+
+ /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
+ if (!key->ps.last_cbuf) {
+ key->ps.spi_shader_col_format &= sel->colors_written_4bit;
+ key->ps.color_is_int8 &= sel->info.colors_written;
+ }
if (rs) {
bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
@@ -762,6 +859,12 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
break;
+
+ case PIPE_SHADER_FRAGMENT:
+ for (i = 0; i < 8; i++)
+ if (sel->info.colors_written & (1 << i))
+ sel->colors_written_4bit |= 0xf << (4 * i);
+ break;
}
/* DB_SHADER_CONTROL */
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 573ab78b482..9e1e158219f 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -2062,12 +2062,12 @@
#define V_008F14_IMG_DATA_FORMAT_8_24 0x14
#define V_008F14_IMG_DATA_FORMAT_24_8 0x15
#define V_008F14_IMG_DATA_FORMAT_X24_8_32 0x16
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_23 0x17
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_24 0x18
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_25 0x19
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_26 0x1A
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_27 0x1B
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_28 0x1C
+#define V_008F14_IMG_DATA_FORMAT_8_AS_8_8_8_8 0x17 /* stoney+ */
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RGB 0x18 /* stoney+ */
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RGBA 0x19 /* stoney+ */
+#define V_008F14_IMG_DATA_FORMAT_ETC2_R 0x1A /* stoney+ */
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RG 0x1B /* stoney+ */
+#define V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1 0x1C /* stoney+ */
#define V_008F14_IMG_DATA_FORMAT_RESERVED_29 0x1D
#define V_008F14_IMG_DATA_FORMAT_RESERVED_30 0x1E
#define V_008F14_IMG_DATA_FORMAT_RESERVED_31 0x1F
@@ -2081,8 +2081,8 @@
#define V_008F14_IMG_DATA_FORMAT_BC5 0x27
#define V_008F14_IMG_DATA_FORMAT_BC6 0x28
#define V_008F14_IMG_DATA_FORMAT_BC7 0x29
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_42 0x2A
-#define V_008F14_IMG_DATA_FORMAT_RESERVED_43 0x2B
+#define V_008F14_IMG_DATA_FORMAT_16_AS_16_16_16_16 0x2A /* stoney+ */
+#define V_008F14_IMG_DATA_FORMAT_16_AS_32_32_32_32 0x2B /* stoney+ */
#define V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1 0x2C
#define V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1 0x2D
#define V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1 0x2E
@@ -2100,8 +2100,8 @@
#define V_008F14_IMG_DATA_FORMAT_6_5_5 0x3A
#define V_008F14_IMG_DATA_FORMAT_1 0x3B
#define V_008F14_IMG_DATA_FORMAT_1_REVERSED 0x3C
-#define V_008F14_IMG_DATA_FORMAT_32_AS_8 0x3D
-#define V_008F14_IMG_DATA_FORMAT_32_AS_8_8 0x3E
+#define V_008F14_IMG_DATA_FORMAT_32_AS_8 0x3D /* not on stoney */
+#define V_008F14_IMG_DATA_FORMAT_32_AS_8_8 0x3E /* not on stoney */
#define V_008F14_IMG_DATA_FORMAT_32_AS_32_32_32_32 0x3F
#define S_008F14_NUM_FORMAT(x) (((x) & 0x0F) << 26)
#define G_008F14_NUM_FORMAT(x) (((x) >> 26) & 0x0F)
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index b5ab9249835..6e703f76499 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -119,7 +119,22 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
trace_dump_trace_flush();
- pipe->draw_vbo(pipe, info);
+ if (info->indirect) {
+ struct pipe_draw_info *_info = NULL;
+
+ _info = MALLOC(sizeof(*_info));
+ if (!_info)
+ return;
+
+ memcpy(_info, info, sizeof(*_info));
+ _info->indirect = trace_resource_unwrap(tr_ctx, _info->indirect);
+ _info->indirect_params = trace_resource_unwrap(tr_ctx,
+ _info->indirect_params);
+ pipe->draw_vbo(pipe, _info);
+ FREE(_info);
+ } else {
+ pipe->draw_vbo(pipe, info);
+ }
trace_dump_call_end();
}
diff --git a/src/gallium/drivers/virgl/Android.mk b/src/gallium/drivers/virgl/Android.mk
new file mode 100644
index 00000000000..b8309e43d71
--- /dev/null
+++ b/src/gallium/drivers/virgl/Android.mk
@@ -0,0 +1,35 @@
+# Copyright (C) 2014 Emil Velikov <[email protected]>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ $(C_SOURCES)
+
+LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_MODULE := libmesa_pipe_virgl
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index 2d9610ee9ab..d4030852943 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -92,6 +92,10 @@ ifneq ($(filter vc4,$(MESA_GPU_DRIVERS)),)
LOCAL_CFLAGS += -DGALLIUM_VC4
gallium_DRIVERS += libmesa_winsys_vc4 libmesa_pipe_vc4
endif
+ifneq ($(filter virgl,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DGALLIUM_VIRGL
+gallium_DRIVERS += libmesa_winsys_virgl libmesa_pipe_virgl
+endif
ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),)
gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga
LOCAL_CFLAGS += -DGALLIUM_VMWGFX
@@ -100,7 +104,7 @@ ifneq ($(filter nouveau r600g,$(MESA_GPU_DRIVERS)),)
LOCAL_SHARED_LIBRARIES += $(if $(filter true,$(MESA_LOLLIPOP_BUILD)),libc++,libstlport)
endif
-LOCAL_STATIC_LIBRARIES := \
+LOCAL_WHOLE_STATIC_LIBRARIES := \
$(gallium_DRIVERS) \
libmesa_st_dri \
libmesa_st_mesa \
@@ -112,6 +116,8 @@ LOCAL_STATIC_LIBRARIES := \
libmesa_util \
libmesa_loader \
+LOCAL_STATIC_LIBRARIES :=
+
ifeq ($(MESA_ENABLE_LLVM),true)
LOCAL_STATIC_LIBRARIES += \
libLLVMR600CodeGen \
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 82c803b564d..30a1aa8d6ba 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -128,6 +128,11 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
int i;
+ pipe_mutex_lock(bo->ws->global_bo_list_lock);
+ LIST_DEL(&bo->global_list_item);
+ bo->ws->num_buffers--;
+ pipe_mutex_unlock(bo->ws->global_bo_list_lock);
+
amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(bo->va_handle);
amdgpu_bo_free(bo->bo);
@@ -249,6 +254,16 @@ static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
/* other functions are never called */
};
+static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
+{
+ struct amdgpu_winsys *ws = bo->ws;
+
+ pipe_mutex_lock(ws->global_bo_list_lock);
+ LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list);
+ ws->num_buffers++;
+ pipe_mutex_unlock(ws->global_bo_list_lock);
+}
+
static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
unsigned size,
unsigned alignment,
@@ -319,6 +334,8 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
else if (initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt += align(size, ws->gart_page_size);
+ amdgpu_add_buffer_to_global_list(bo);
+
return bo;
error_va_map:
@@ -588,6 +605,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ amdgpu_add_buffer_to_global_list(bo);
+
return &bo->base;
error_va_map:
@@ -673,6 +692,8 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+ amdgpu_add_buffer_to_global_list(bo);
+
return (struct pb_buffer*)bo;
error_va_map:
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 12cb920b387..54f5dbdc459 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -60,6 +60,8 @@ struct amdgpu_winsys_bo {
/* Fences for buffer synchronization. */
struct pipe_fence_handle *fence[RING_LAST];
+
+ struct list_head global_list_item;
};
bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 10f112d01b3..83da740f649 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -605,6 +605,7 @@ static void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
}
DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", FALSE)
static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
unsigned flags,
@@ -644,9 +645,35 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
int r;
- r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
- cs->handles, cs->flags,
- &cs->request.resources);
+ /* Use a buffer list containing all allocated buffers if requested. */
+ if (debug_get_option_all_bos()) {
+ struct amdgpu_winsys_bo *bo;
+ amdgpu_bo_handle *handles;
+ unsigned num = 0;
+
+ pipe_mutex_lock(ws->global_bo_list_lock);
+
+ handles = malloc(sizeof(handles[0]) * ws->num_buffers);
+ if (!handles) {
+ pipe_mutex_unlock(ws->global_bo_list_lock);
+ goto cleanup;
+ }
+
+ LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
+ assert(num < ws->num_buffers);
+ handles[num++] = bo->bo;
+ }
+
+ r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
+ handles, NULL,
+ &cs->request.resources);
+ free(handles);
+ pipe_mutex_unlock(ws->global_bo_list_lock);
+ } else {
+ r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
+ cs->handles, cs->flags,
+ &cs->request.resources);
+ }
if (r) {
fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 39d3aa4f783..7393a1d1eb4 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -266,17 +266,12 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
ws->info.r600_virtual_address = TRUE;
ws->info.r600_has_dma = dma.available_rings != 0;
- /* Guess what the maximum compute unit number is by looking at the mask
- * of enabled CUs.
- */
+ /* Get the number of good compute units. */
+ ws->info.num_good_compute_units = 0;
for (i = 0; i < ws->info.max_se; i++)
- for (j = 0; j < ws->info.max_sh_per_se; j++) {
- unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
-
- if (ws->info.max_compute_units < max)
- ws->info.max_compute_units = max;
- }
- ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+ for (j = 0; j < ws->info.max_sh_per_se; j++)
+ ws->info.num_good_compute_units +=
+ util_bitcount(ws->amdinfo.cu_bitmap[i][j]);
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
sizeof(ws->amdinfo.gb_tile_mode));
@@ -305,6 +300,7 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
pipe_mutex_destroy(ws->bo_fence_lock);
pb_cache_deinit(&ws->bo_cache);
+ pipe_mutex_destroy(ws->global_bo_list_lock);
AddrDestroy(ws->addrlib);
amdgpu_device_deinitialize(ws->dev);
FREE(rws);
@@ -477,6 +473,8 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
amdgpu_cs_init_functions(ws);
amdgpu_surface_init_functions(ws);
+ LIST_INITHEAD(&ws->global_bo_list);
+ pipe_mutex_init(ws->global_bo_list_lock);
pipe_mutex_init(ws->bo_fence_lock);
/* Create the screen at the end. The winsys must be initialized
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 615f55411f8..91b9be4bb32 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -63,6 +63,11 @@ struct amdgpu_winsys {
ADDR_HANDLE addrlib;
uint32_t rev_id;
unsigned family;
+
+ /* List of all allocated buffers */
+ pipe_mutex global_bo_list_lock;
+ struct list_head global_bo_list;
+ unsigned num_buffers;
};
static inline struct amdgpu_winsys *
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index c7e058bf3da..8a1ed3ae08c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -419,9 +419,9 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
&ws->info.r600_max_pipes);
/* All GPUs have at least one compute unit */
- ws->info.max_compute_units = 1;
+ ws->info.num_good_compute_units = 1;
radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL,
- &ws->info.max_compute_units);
+ &ws->info.num_good_compute_units);
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
&ws->info.max_se);
diff --git a/src/gallium/winsys/virgl/drm/Android.mk b/src/gallium/winsys/virgl/drm/Android.mk
new file mode 100644
index 00000000000..849350343f6
--- /dev/null
+++ b/src/gallium/winsys/virgl/drm/Android.mk
@@ -0,0 +1,34 @@
+# Copyright (C) 2014 Emil Velikov <[email protected]>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_MODULE := libmesa_winsys_virgl
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)