summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile.sources4
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c330
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c12
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c757
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.h2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c6
-rw-r--r--src/gallium/auxiliary/util/u_debug.c468
-rw-r--r--src/gallium/auxiliary/util/u_debug.h39
-rw-r--r--src/gallium/auxiliary/util/u_debug_image.c348
-rw-r--r--src/gallium/auxiliary/util/u_debug_image.h74
-rw-r--r--src/gallium/auxiliary/util/u_debug_stack.c61
-rw-r--r--src/gallium/auxiliary/util/u_pstipple.c4
-rw-r--r--src/gallium/auxiliary/util/u_pstipple.h5
-rw-r--r--src/gallium/auxiliary/util/u_staging.c50
-rw-r--r--src/gallium/auxiliary/util/u_staging.h24
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c10
-rw-r--r--src/gallium/drivers/ilo/ilo_draw.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_query.c9
-rw-r--r--src/gallium/drivers/ilo/ilo_render.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c1
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_query.c7
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.c6
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c2
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.c13
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c377
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h58
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c308
-rw-r--r--src/gallium/drivers/radeonsi/sid.h3
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c1
-rw-r--r--src/gallium/drivers/svga/svga_pipe_flush.c1
-rw-r--r--src/gallium/drivers/trace/tr_context.c45
-rw-r--r--src/gallium/include/pipe/p_defines.h6
-rw-r--r--src/gallium/targets/graw-null/graw_util.c1
-rw-r--r--src/gallium/tests/graw/graw_util.h1
-rw-r--r--src/gallium/tests/trivial/quad-tex.c2
-rw-r--r--src/gallium/tests/trivial/tri.c2
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c6
42 files changed, 1486 insertions, 1569 deletions
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 6f50f714c3f..84da85c5b96 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -191,11 +191,13 @@ C_SOURCES := \
util/u_cpu_detect.c \
util/u_cpu_detect.h \
util/u_debug.c \
+ util/u_debug.h \
util/u_debug_describe.c \
util/u_debug_describe.h \
util/u_debug_flush.c \
util/u_debug_flush.h \
- util/u_debug.h \
+ util/u_debug_image.c \
+ util/u_debug_image.h \
util/u_debug_memory.c \
util/u_debug_refcnt.c \
util/u_debug_refcnt.h \
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index cf52ca48b26..0298334a28f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -43,10 +43,10 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_pstipple.h"
#include "util/u_sampler.h"
#include "tgsi/tgsi_transform.h"
-#include "tgsi/tgsi_dump.h"
#include "draw_context.h"
#include "draw_pipe.h"
@@ -114,178 +114,6 @@ struct pstip_stage
};
-
-/**
- * Subclass of tgsi_transform_context, used for transforming the
- * user's fragment shader to add the extra texture sample and fragment kill
- * instructions.
- */
-struct pstip_transform_context {
- struct tgsi_transform_context base;
- uint tempsUsed; /**< bitmask */
- int wincoordInput;
- int maxInput;
- uint samplersUsed; /**< bitfield of samplers used */
- bool hasSview;
- int freeSampler; /** an available sampler for the pstipple */
- int texTemp; /**< temp registers */
- int numImmed;
-};
-
-
-/**
- * TGSI declaration transform callback.
- * Look for a free sampler, a free input attrib, and two free temp regs.
- */
-static void
-pstip_transform_decl(struct tgsi_transform_context *ctx,
- struct tgsi_full_declaration *decl)
-{
- struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
-
- if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
- uint i;
- for (i = decl->Range.First;
- i <= decl->Range.Last; i++) {
- pctx->samplersUsed |= 1 << i;
- }
- }
- else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
- pctx->hasSview = true;
- }
- else if (decl->Declaration.File == TGSI_FILE_INPUT) {
- pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
- if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
- pctx->wincoordInput = (int) decl->Range.First;
- }
- else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
- uint i;
- for (i = decl->Range.First;
- i <= decl->Range.Last; i++) {
- pctx->tempsUsed |= (1 << i);
- }
- }
-
- ctx->emit_declaration(ctx, decl);
-}
-
-
-/**
- * TGSI immediate declaration transform callback.
- * We're just counting the number of immediates here.
- */
-static void
-pstip_transform_immed(struct tgsi_transform_context *ctx,
- struct tgsi_full_immediate *immed)
-{
- struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
- ctx->emit_immediate(ctx, immed); /* emit to output shader */
- pctx->numImmed++;
-}
-
-
-/**
- * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
- */
-static int
-free_bit(uint bitfield)
-{
- return ffs(~bitfield) - 1;
-}
-
-
-/**
- * TGSI transform prolog callback.
- */
-static void
-pstip_transform_prolog(struct tgsi_transform_context *ctx)
-{
- struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
- uint i;
- int wincoordInput;
-
- /* find free sampler */
- pctx->freeSampler = free_bit(pctx->samplersUsed);
- if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
- pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
-
- if (pctx->wincoordInput < 0)
- wincoordInput = pctx->maxInput + 1;
- else
- wincoordInput = pctx->wincoordInput;
-
- /* find one free temp reg */
- for (i = 0; i < 32; i++) {
- if ((pctx->tempsUsed & (1 << i)) == 0) {
- /* found a free temp */
- if (pctx->texTemp < 0)
- pctx->texTemp = i;
- else
- break;
- }
- }
- assert(pctx->texTemp >= 0);
-
- if (pctx->wincoordInput < 0) {
- /* declare new position input reg */
- tgsi_transform_input_decl(ctx, wincoordInput,
- TGSI_SEMANTIC_POSITION, 1,
- TGSI_INTERPOLATE_LINEAR);
- }
-
- /* declare new sampler */
- tgsi_transform_sampler_decl(ctx, pctx->freeSampler);
-
- /* if the src shader has SVIEW decl's for each SAMP decl, we
- * need to continue the trend and ensure there is a matching
- * SVIEW for the new SAMP we just created
- */
- if (pctx->hasSview) {
- tgsi_transform_sampler_view_decl(ctx,
- pctx->freeSampler,
- TGSI_TEXTURE_2D,
- TGSI_RETURN_TYPE_FLOAT);
- }
-
- /* declare new temp regs */
- tgsi_transform_temp_decl(ctx, pctx->texTemp);
-
- /* emit immediate = {1/32, 1/32, 1, 1}
- * The index/position of this immediate will be pctx->numImmed
- */
- tgsi_transform_immediate_decl(ctx, 1.0/32.0, 1.0/32.0, 1.0, 1.0);
-
- /*
- * Insert new MUL/TEX/KILL_IF instructions at start of program
- * Take gl_FragCoord, divide by 32 (stipple size), sample the
- * texture and kill fragment if needed.
- *
- * We'd like to use non-normalized texcoords to index into a RECT
- * texture, but we can only use GL_REPEAT wrap mode with normalized
- * texcoords. Darn.
- */
-
- /* MUL texTemp, INPUT[wincoord], 1/32; */
- tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
- TGSI_FILE_TEMPORARY, pctx->texTemp,
- TGSI_WRITEMASK_XYZW,
- TGSI_FILE_INPUT, wincoordInput,
- TGSI_FILE_IMMEDIATE, pctx->numImmed);
-
- /* TEX texTemp, texTemp, sampler; */
- tgsi_transform_tex_2d_inst(ctx,
- TGSI_FILE_TEMPORARY, pctx->texTemp,
- TGSI_FILE_TEMPORARY, pctx->texTemp,
- pctx->freeSampler);
-
- /* KILL_IF -texTemp.wwww; # if -texTemp < 0, KILL fragment */
- tgsi_transform_kill_inst(ctx,
- TGSI_FILE_TEMPORARY, pctx->texTemp,
- TGSI_SWIZZLE_W, TRUE);
-}
-
-
-
/**
* Generate the frag shader we'll use for doing polygon stipple.
* This will be the user's shader prefixed with a TEX and KIL instruction.
@@ -293,40 +121,27 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
static boolean
generate_pstip_fs(struct pstip_stage *pstip)
{
+ struct pipe_context *pipe = pstip->pipe;
+ struct pipe_screen *screen = pipe->screen;
const struct pipe_shader_state *orig_fs = &pstip->fs->state;
/*struct draw_context *draw = pstip->stage.draw;*/
struct pipe_shader_state pstip_fs;
- struct pstip_transform_context transform;
- const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
+ enum tgsi_file_type wincoord_file;
+
+ wincoord_file = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL) ?
+ TGSI_FILE_SYSTEM_VALUE : TGSI_FILE_INPUT;
pstip_fs = *orig_fs; /* copy to init */
- pstip_fs.tokens = tgsi_alloc_tokens(newLen);
+ pstip_fs.tokens = util_pstipple_create_fragment_shader(orig_fs->tokens,
+ &pstip->fs->sampler_unit,
+ 0,
+ wincoord_file);
if (pstip_fs.tokens == NULL)
return FALSE;
- memset(&transform, 0, sizeof(transform));
- transform.wincoordInput = -1;
- transform.maxInput = -1;
- transform.texTemp = -1;
- transform.base.prolog = pstip_transform_prolog;
- transform.base.transform_declaration = pstip_transform_decl;
- transform.base.transform_immediate = pstip_transform_immed;
-
- tgsi_transform_shader(orig_fs->tokens,
- (struct tgsi_token *) pstip_fs.tokens,
- newLen, &transform.base);
-
-#if 0 /* DEBUG */
- tgsi_dump(orig_fs->tokens, 0);
- tgsi_dump(pstip_fs.tokens, 0);
-#endif
-
- assert(pstip->fs);
-
- pstip->fs->sampler_unit = transform.freeSampler;
assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
- pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
+ pstip->fs->pstip_fs = pstip->driver_create_fs_state(pipe, &pstip_fs);
FREE((void *)pstip_fs.tokens);
@@ -338,113 +153,6 @@ generate_pstip_fs(struct pstip_stage *pstip)
/**
- * Load texture image with current stipple pattern.
- */
-static void
-pstip_update_texture(struct pstip_stage *pstip)
-{
- static const uint bit31 = 1 << 31;
- struct pipe_context *pipe = pstip->pipe;
- struct pipe_transfer *transfer;
- const uint *stipple = pstip->state.stipple->stipple;
- uint i, j;
- ubyte *data;
-
- data = pipe_transfer_map(pipe, pstip->texture, 0, 0,
- PIPE_TRANSFER_WRITE, 0, 0, 32, 32, &transfer);
-
- /*
- * Load alpha texture.
- * Note: 0 means keep the fragment, 255 means kill it.
- * We'll negate the texel value and use KILL_IF which kills if value
- * is negative.
- */
- for (i = 0; i < 32; i++) {
- for (j = 0; j < 32; j++) {
- if (stipple[i] & (bit31 >> j)) {
- /* fragment "on" */
- data[i * transfer->stride + j] = 0;
- }
- else {
- /* fragment "off" */
- data[i * transfer->stride + j] = 255;
- }
- }
- }
-
- /* unmap */
- pipe_transfer_unmap(pipe, transfer);
-}
-
-
-/**
- * Create the texture map we'll use for stippling.
- */
-static boolean
-pstip_create_texture(struct pstip_stage *pstip)
-{
- struct pipe_context *pipe = pstip->pipe;
- struct pipe_screen *screen = pipe->screen;
- struct pipe_resource texTemp;
- struct pipe_sampler_view viewTempl;
-
- memset(&texTemp, 0, sizeof(texTemp));
- texTemp.target = PIPE_TEXTURE_2D;
- texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
- texTemp.last_level = 0;
- texTemp.width0 = 32;
- texTemp.height0 = 32;
- texTemp.depth0 = 1;
- texTemp.array_size = 1;
- texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
-
- pstip->texture = screen->resource_create(screen, &texTemp);
- if (pstip->texture == NULL)
- return FALSE;
-
- u_sampler_view_default_template(&viewTempl,
- pstip->texture,
- pstip->texture->format);
- pstip->sampler_view = pipe->create_sampler_view(pipe,
- pstip->texture,
- &viewTempl);
- if (!pstip->sampler_view) {
- return FALSE;
- }
-
- return TRUE;
-}
-
-
-/**
- * Create the sampler CSO that'll be used for stippling.
- */
-static boolean
-pstip_create_sampler(struct pstip_stage *pstip)
-{
- struct pipe_sampler_state sampler;
- struct pipe_context *pipe = pstip->pipe;
-
- memset(&sampler, 0, sizeof(sampler));
- sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
- sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
- sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
- sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler.normalized_coords = 1;
- sampler.min_lod = 0.0f;
- sampler.max_lod = 0.0f;
-
- pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
- if (pstip->sampler_cso == NULL)
- return FALSE;
-
- return TRUE;
-}
-
-
-/**
* When we're about to draw our first stipple polygon in a batch, this function
* is called to tell the driver to bind our modified fragment shader.
*/
@@ -722,7 +430,8 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
/* pass-through */
pstip->driver_set_polygon_stipple(pstip->pipe, stipple);
- pstip_update_texture(pstip);
+ util_pstipple_update_stipple_texture(pstip->pipe, pstip->texture,
+ pstip->state.stipple->stipple);
}
@@ -758,10 +467,17 @@ draw_install_pstipple_stage(struct draw_context *draw,
pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
/* create special texture, sampler state */
- if (!pstip_create_texture(pstip))
+ pstip->texture = util_pstipple_create_stipple_texture(pipe, NULL);
+ if (!pstip->texture)
+ goto fail;
+
+ pstip->sampler_view = util_pstipple_create_sampler_view(pipe,
+ pstip->texture);
+ if (!pstip->sampler_view)
goto fail;
- if (!pstip_create_sampler(pstip))
+ pstip->sampler_cso = util_pstipple_create_sampler(pipe);
+ if (!pstip->sampler_cso)
goto fail;
/* override the driver's functions */
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 3e7d69f73ed..61ff0a74379 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -614,8 +614,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
}
nir_ssa_def *offset;
- if (dim) {
- /* UBO loads don't have a const_index[0] base offset. */
+ if (op == nir_intrinsic_load_ubo) {
+ /* UBO loads don't have a base offset. */
offset = nir_imm_int(b, index);
if (indirect) {
offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
@@ -623,7 +623,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
/* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
offset = nir_ishl(b, offset, nir_imm_int(b, 4));
} else {
- load->const_index[0] = index;
+ nir_intrinsic_set_base(load, index);
if (indirect) {
offset = ttn_src_for_indirect(c, indirect);
} else {
@@ -1875,7 +1875,7 @@ ttn_emit_instruction(struct ttn_compile *c)
&tgsi_dst->Indirect : NULL;
store->num_components = 4;
- store->const_index[0] = dest.write_mask;
+ nir_intrinsic_set_write_mask(store, dest.write_mask);
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
@@ -1907,8 +1907,8 @@ ttn_add_output_stores(struct ttn_compile *c)
store->num_components = 4;
store->src[0].reg.reg = c->output_regs[loc].reg;
store->src[0].reg.base_offset = c->output_regs[loc].offset;
- store->const_index[0] = loc;
- store->const_index[1] = 0xf; /* writemask */
+ nir_intrinsic_set_base(store, loc);
+ nir_intrinsic_set_write_mask(store, 0xf);
store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_builder_instr_insert(b, &store->instr);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 687fb54830d..489423d7f12 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -44,6 +44,387 @@
+static void
+scan_instruction(struct tgsi_shader_info *info,
+ const struct tgsi_full_instruction *fullinst,
+ unsigned *current_depth)
+{
+ unsigned i;
+
+ assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
+ info->opcode_count[fullinst->Instruction.Opcode]++;
+
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ case TGSI_OPCODE_BGNLOOP:
+ (*current_depth)++;
+ info->max_depth = MAX2(info->max_depth, *current_depth);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_ENDLOOP:
+ (*current_depth)--;
+ break;
+ default:
+ break;
+ }
+
+ if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
+ unsigned input;
+
+ if (src0->Register.Indirect && src0->Indirect.ArrayID)
+ input = info->input_array_first[src0->Indirect.ArrayID];
+ else
+ input = src0->Register.Index;
+
+ /* For the INTERP opcodes, the interpolation is always
+ * PERSPECTIVE unless LINEAR is specified.
+ */
+ switch (info->input_interpolate[input]) {
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_CONSTANT:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_INTERP_CENTROID:
+ info->uses_persp_opcode_interp_centroid = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET:
+ info->uses_persp_opcode_interp_offset = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ info->uses_persp_opcode_interp_sample = TRUE;
+ break;
+ }
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_INTERP_CENTROID:
+ info->uses_linear_opcode_interp_centroid = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET:
+ info->uses_linear_opcode_interp_offset = TRUE;
+ break;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ info->uses_linear_opcode_interp_sample = TRUE;
+ break;
+ }
+ break;
+ }
+ }
+
+ if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+ fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+ info->uses_doubles = TRUE;
+
+ for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *src = &fullinst->Src[i];
+ int ind = src->Register.Index;
+
+ /* Mark which inputs are effectively used */
+ if (src->Register.File == TGSI_FILE_INPUT) {
+ unsigned usage_mask;
+ usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
+ if (src->Register.Indirect) {
+ for (ind = 0; ind < info->num_inputs; ++ind) {
+ info->input_usage_mask[ind] |= usage_mask;
+ }
+ } else {
+ assert(ind >= 0);
+ assert(ind < PIPE_MAX_SHADER_INPUTS);
+ info->input_usage_mask[ind] |= usage_mask;
+ }
+
+ if (info->processor == TGSI_PROCESSOR_FRAGMENT &&
+ !src->Register.Indirect) {
+ unsigned name =
+ info->input_semantic_name[src->Register.Index];
+ unsigned index =
+ info->input_semantic_index[src->Register.Index];
+
+ if (name == TGSI_SEMANTIC_POSITION &&
+ (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
+ src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
+ src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
+ src->Register.SwizzleW == TGSI_SWIZZLE_Z))
+ info->reads_z = TRUE;
+
+ if (name == TGSI_SEMANTIC_COLOR) {
+ unsigned mask =
+ (1 << src->Register.SwizzleX) |
+ (1 << src->Register.SwizzleY) |
+ (1 << src->Register.SwizzleZ) |
+ (1 << src->Register.SwizzleW);
+
+ info->colors_read |= mask << (index * 4);
+ }
+ }
+ }
+
+ /* check for indirect register reads */
+ if (src->Register.Indirect) {
+ info->indirect_files |= (1 << src->Register.File);
+ info->indirect_files_read |= (1 << src->Register.File);
+ }
+
+ /* MSAA samplers */
+ if (src->Register.File == TGSI_FILE_SAMPLER) {
+ assert(fullinst->Instruction.Texture);
+ assert(src->Register.Index < Elements(info->is_msaa_sampler));
+
+ if (fullinst->Instruction.Texture &&
+ (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+ fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
+ info->is_msaa_sampler[src->Register.Index] = TRUE;
+ }
+ }
+ }
+
+ /* check for indirect register writes */
+ for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
+ if (dst->Register.Indirect) {
+ info->indirect_files |= (1 << dst->Register.File);
+ info->indirect_files_written |= (1 << dst->Register.File);
+ }
+ }
+
+ info->num_instructions++;
+}
+
+
+static void
+scan_declaration(struct tgsi_shader_info *info,
+ const struct tgsi_full_declaration *fulldecl)
+{
+ const uint file = fulldecl->Declaration.File;
+ const unsigned procType = info->processor;
+ uint reg;
+
+ if (fulldecl->Declaration.Array) {
+ unsigned array_id = fulldecl->Array.ArrayID;
+
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ assert(array_id < ARRAY_SIZE(info->input_array_first));
+ info->input_array_first[array_id] = fulldecl->Range.First;
+ info->input_array_last[array_id] = fulldecl->Range.Last;
+ break;
+ case TGSI_FILE_OUTPUT:
+ assert(array_id < ARRAY_SIZE(info->output_array_first));
+ info->output_array_first[array_id] = fulldecl->Range.First;
+ info->output_array_last[array_id] = fulldecl->Range.Last;
+ break;
+ }
+ info->array_max[file] = MAX2(info->array_max[file], array_id);
+ }
+
+ for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) {
+ unsigned semName = fulldecl->Semantic.Name;
+ unsigned semIndex = fulldecl->Semantic.Index +
+ (reg - fulldecl->Range.First);
+
+ /* only first 32 regs will appear in this bitfield */
+ info->file_mask[file] |= (1 << reg);
+ info->file_count[file]++;
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+
+ if (file == TGSI_FILE_CONSTANT) {
+ int buffer = 0;
+
+ if (fulldecl->Declaration.Dimension)
+ buffer = fulldecl->Dim.Index2D;
+
+ info->const_file_max[buffer] =
+ MAX2(info->const_file_max[buffer], (int)reg);
+ }
+ else if (file == TGSI_FILE_INPUT) {
+ info->input_semantic_name[reg] = (ubyte) semName;
+ info->input_semantic_index[reg] = (ubyte) semIndex;
+ info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
+ info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
+ info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
+ info->num_inputs++;
+
+ /* Only interpolated varyings. Don't include POSITION.
+ * Don't include integer varyings, because they are not
+ * interpolated.
+ */
+ if (semName == TGSI_SEMANTIC_GENERIC ||
+ semName == TGSI_SEMANTIC_TEXCOORD ||
+ semName == TGSI_SEMANTIC_COLOR ||
+ semName == TGSI_SEMANTIC_BCOLOR ||
+ semName == TGSI_SEMANTIC_FOG ||
+ semName == TGSI_SEMANTIC_CLIPDIST ||
+ semName == TGSI_SEMANTIC_CULLDIST) {
+ switch (fulldecl->Interp.Interpolate) {
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ switch (fulldecl->Interp.Location) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ info->uses_persp_center = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ info->uses_persp_centroid = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ info->uses_persp_sample = TRUE;
+ break;
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (fulldecl->Interp.Location) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ info->uses_linear_center = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ info->uses_linear_centroid = TRUE;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ info->uses_linear_sample = TRUE;
+ break;
+ }
+ break;
+ /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
+ }
+ }
+
+ if (semName == TGSI_SEMANTIC_PRIMID)
+ info->uses_primid = TRUE;
+ else if (procType == TGSI_PROCESSOR_FRAGMENT) {
+ if (semName == TGSI_SEMANTIC_POSITION)
+ info->reads_position = TRUE;
+ else if (semName == TGSI_SEMANTIC_FACE)
+ info->uses_frontface = TRUE;
+ }
+ }
+ else if (file == TGSI_FILE_SYSTEM_VALUE) {
+ unsigned index = fulldecl->Range.First;
+
+ info->system_value_semantic_name[index] = semName;
+ info->num_system_values = MAX2(info->num_system_values, index + 1);
+
+ switch (semName) {
+ case TGSI_SEMANTIC_INSTANCEID:
+ info->uses_instanceid = TRUE;
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ info->uses_vertexid = TRUE;
+ break;
+ case TGSI_SEMANTIC_VERTEXID_NOBASE:
+ info->uses_vertexid_nobase = TRUE;
+ break;
+ case TGSI_SEMANTIC_BASEVERTEX:
+ info->uses_basevertex = TRUE;
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ info->uses_primid = TRUE;
+ break;
+ case TGSI_SEMANTIC_INVOCATIONID:
+ info->uses_invocationid = TRUE;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ info->reads_position = TRUE;
+ break;
+ case TGSI_SEMANTIC_FACE:
+ info->uses_frontface = TRUE;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->reads_samplemask = TRUE;
+ break;
+ }
+ }
+ else if (file == TGSI_FILE_OUTPUT) {
+ info->output_semantic_name[reg] = (ubyte) semName;
+ info->output_semantic_index[reg] = (ubyte) semIndex;
+ info->num_outputs++;
+
+ if (semName == TGSI_SEMANTIC_COLOR)
+ info->colors_written |= 1 << semIndex;
+
+ if (procType == TGSI_PROCESSOR_VERTEX ||
+ procType == TGSI_PROCESSOR_GEOMETRY ||
+ procType == TGSI_PROCESSOR_TESS_CTRL ||
+ procType == TGSI_PROCESSOR_TESS_EVAL) {
+ switch (semName) {
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ info->writes_viewport_index = TRUE;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ info->writes_layer = TRUE;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ info->writes_psize = TRUE;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ info->writes_clipvertex = TRUE;
+ break;
+ }
+ }
+
+ if (procType == TGSI_PROCESSOR_FRAGMENT) {
+ switch (semName) {
+ case TGSI_SEMANTIC_POSITION:
+ info->writes_z = TRUE;
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ info->writes_stencil = TRUE;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->writes_samplemask = TRUE;
+ break;
+ }
+ }
+
+ if (procType == TGSI_PROCESSOR_VERTEX) {
+ if (semName == TGSI_SEMANTIC_EDGEFLAG) {
+ info->writes_edgeflag = TRUE;
+ }
+ }
+ } else if (file == TGSI_FILE_SAMPLER) {
+ info->samplers_declared |= 1 << reg;
+ }
+ }
+}
+
+
+static void
+scan_immediate(struct tgsi_shader_info *info)
+{
+ uint reg = info->immediate_count++;
+ uint file = TGSI_FILE_IMMEDIATE;
+
+ info->file_mask[file] |= (1 << reg);
+ info->file_count[file]++;
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+}
+
+
+static void
+scan_property(struct tgsi_shader_info *info,
+ const struct tgsi_full_property *fullprop)
+{
+ unsigned name = fullprop->Property.PropertyName;
+ unsigned value = fullprop->u[0].Data;
+
+ assert(name < Elements(info->properties));
+ info->properties[name] = value;
+
+ switch (name) {
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ info->num_written_clipdistance = value;
+ info->clipdist_writemask |= (1 << value) - 1;
+ break;
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ info->num_written_culldistance = value;
+ info->culldist_writemask |= (1 << value) - 1;
+ break;
+ }
+}
+
/**
* Scan the given TGSI shader to collect information such as number of
@@ -81,390 +462,30 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
procType == TGSI_PROCESSOR_COMPUTE);
info->processor = procType;
-
/**
** Loop over incoming program tokens/instructions
*/
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
+ while (!tgsi_parse_end_of_tokens(&parse)) {
info->num_tokens++;
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
- {
- const struct tgsi_full_instruction *fullinst
- = &parse.FullToken.FullInstruction;
- uint i;
-
- assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
- info->opcode_count[fullinst->Instruction.Opcode]++;
-
- switch (fullinst->Instruction.Opcode) {
- case TGSI_OPCODE_IF:
- case TGSI_OPCODE_UIF:
- case TGSI_OPCODE_BGNLOOP:
- current_depth++;
- info->max_depth = MAX2(info->max_depth, current_depth);
- break;
- case TGSI_OPCODE_ENDIF:
- case TGSI_OPCODE_ENDLOOP:
- current_depth--;
- break;
- default:
- break;
- }
-
- if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
- fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
- fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
- const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
- unsigned input;
-
- if (src0->Register.Indirect && src0->Indirect.ArrayID)
- input = info->input_array_first[src0->Indirect.ArrayID];
- else
- input = src0->Register.Index;
-
- /* For the INTERP opcodes, the interpolation is always
- * PERSPECTIVE unless LINEAR is specified.
- */
- switch (info->input_interpolate[input]) {
- case TGSI_INTERPOLATE_COLOR:
- case TGSI_INTERPOLATE_CONSTANT:
- case TGSI_INTERPOLATE_PERSPECTIVE:
- switch (fullinst->Instruction.Opcode) {
- case TGSI_OPCODE_INTERP_CENTROID:
- info->uses_persp_opcode_interp_centroid = true;
- break;
- case TGSI_OPCODE_INTERP_OFFSET:
- info->uses_persp_opcode_interp_offset = true;
- break;
- case TGSI_OPCODE_INTERP_SAMPLE:
- info->uses_persp_opcode_interp_sample = true;
- break;
- }
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- switch (fullinst->Instruction.Opcode) {
- case TGSI_OPCODE_INTERP_CENTROID:
- info->uses_linear_opcode_interp_centroid = true;
- break;
- case TGSI_OPCODE_INTERP_OFFSET:
- info->uses_linear_opcode_interp_offset = true;
- break;
- case TGSI_OPCODE_INTERP_SAMPLE:
- info->uses_linear_opcode_interp_sample = true;
- break;
- }
- break;
- }
- }
-
- if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
- fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
- info->uses_doubles = true;
-
- for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *src =
- &fullinst->Src[i];
- int ind = src->Register.Index;
-
- /* Mark which inputs are effectively used */
- if (src->Register.File == TGSI_FILE_INPUT) {
- unsigned usage_mask;
- usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
- if (src->Register.Indirect) {
- for (ind = 0; ind < info->num_inputs; ++ind) {
- info->input_usage_mask[ind] |= usage_mask;
- }
- } else {
- assert(ind >= 0);
- assert(ind < PIPE_MAX_SHADER_INPUTS);
- info->input_usage_mask[ind] |= usage_mask;
- }
-
- if (procType == TGSI_PROCESSOR_FRAGMENT &&
- !src->Register.Indirect) {
- unsigned name =
- info->input_semantic_name[src->Register.Index];
- unsigned index =
- info->input_semantic_index[src->Register.Index];
-
- if (name == TGSI_SEMANTIC_POSITION &&
- (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleW == TGSI_SWIZZLE_Z))
- info->reads_z = TRUE;
-
- if (name == TGSI_SEMANTIC_COLOR) {
- unsigned mask =
- (1 << src->Register.SwizzleX) |
- (1 << src->Register.SwizzleY) |
- (1 << src->Register.SwizzleZ) |
- (1 << src->Register.SwizzleW);
-
- info->colors_read |= mask << (index * 4);
- }
- }
- }
-
- /* check for indirect register reads */
- if (src->Register.Indirect) {
- info->indirect_files |= (1 << src->Register.File);
- info->indirect_files_read |= (1 << src->Register.File);
- }
-
- /* MSAA samplers */
- if (src->Register.File == TGSI_FILE_SAMPLER) {
- assert(fullinst->Instruction.Texture);
- assert(src->Register.Index < Elements(info->is_msaa_sampler));
-
- if (fullinst->Instruction.Texture &&
- (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
- fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
- info->is_msaa_sampler[src->Register.Index] = TRUE;
- }
- }
- }
-
- /* check for indirect register writes */
- for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
- const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
- if (dst->Register.Indirect) {
- info->indirect_files |= (1 << dst->Register.File);
- info->indirect_files_written |= (1 << dst->Register.File);
- }
- }
-
- info->num_instructions++;
- }
+ scan_instruction(info, &parse.FullToken.FullInstruction,
+ &current_depth);
break;
-
case TGSI_TOKEN_TYPE_DECLARATION:
- {
- const struct tgsi_full_declaration *fulldecl
- = &parse.FullToken.FullDeclaration;
- const uint file = fulldecl->Declaration.File;
- uint reg;
-
- if (fulldecl->Declaration.Array) {
- unsigned array_id = fulldecl->Array.ArrayID;
-
- switch (file) {
- case TGSI_FILE_INPUT:
- assert(array_id < ARRAY_SIZE(info->input_array_first));
- info->input_array_first[array_id] = fulldecl->Range.First;
- info->input_array_last[array_id] = fulldecl->Range.Last;
- break;
- case TGSI_FILE_OUTPUT:
- assert(array_id < ARRAY_SIZE(info->output_array_first));
- info->output_array_first[array_id] = fulldecl->Range.First;
- info->output_array_last[array_id] = fulldecl->Range.Last;
- break;
- }
- info->array_max[file] = MAX2(info->array_max[file], array_id);
- }
-
- for (reg = fulldecl->Range.First;
- reg <= fulldecl->Range.Last;
- reg++) {
- unsigned semName = fulldecl->Semantic.Name;
- unsigned semIndex =
- fulldecl->Semantic.Index + (reg - fulldecl->Range.First);
-
- /* only first 32 regs will appear in this bitfield */
- info->file_mask[file] |= (1 << reg);
- info->file_count[file]++;
- info->file_max[file] = MAX2(info->file_max[file], (int)reg);
-
- if (file == TGSI_FILE_CONSTANT) {
- int buffer = 0;
-
- if (fulldecl->Declaration.Dimension)
- buffer = fulldecl->Dim.Index2D;
-
- info->const_file_max[buffer] =
- MAX2(info->const_file_max[buffer], (int)reg);
- }
- else if (file == TGSI_FILE_INPUT) {
- info->input_semantic_name[reg] = (ubyte) semName;
- info->input_semantic_index[reg] = (ubyte) semIndex;
- info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
- info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
- info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
- info->num_inputs++;
-
- /* Only interpolated varyings. Don't include POSITION.
- * Don't include integer varyings, because they are not
- * interpolated.
- */
- if (semName == TGSI_SEMANTIC_GENERIC ||
- semName == TGSI_SEMANTIC_TEXCOORD ||
- semName == TGSI_SEMANTIC_COLOR ||
- semName == TGSI_SEMANTIC_BCOLOR ||
- semName == TGSI_SEMANTIC_FOG ||
- semName == TGSI_SEMANTIC_CLIPDIST ||
- semName == TGSI_SEMANTIC_CULLDIST) {
- switch (fulldecl->Interp.Interpolate) {
- case TGSI_INTERPOLATE_COLOR:
- case TGSI_INTERPOLATE_PERSPECTIVE:
- switch (fulldecl->Interp.Location) {
- case TGSI_INTERPOLATE_LOC_CENTER:
- info->uses_persp_center = true;
- break;
- case TGSI_INTERPOLATE_LOC_CENTROID:
- info->uses_persp_centroid = true;
- break;
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- info->uses_persp_sample = true;
- break;
- }
- break;
- case TGSI_INTERPOLATE_LINEAR:
- switch (fulldecl->Interp.Location) {
- case TGSI_INTERPOLATE_LOC_CENTER:
- info->uses_linear_center = true;
- break;
- case TGSI_INTERPOLATE_LOC_CENTROID:
- info->uses_linear_centroid = true;
- break;
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- info->uses_linear_sample = true;
- break;
- }
- break;
- /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
- }
- }
-
- if (semName == TGSI_SEMANTIC_PRIMID)
- info->uses_primid = TRUE;
- else if (procType == TGSI_PROCESSOR_FRAGMENT) {
- if (semName == TGSI_SEMANTIC_POSITION)
- info->reads_position = TRUE;
- else if (semName == TGSI_SEMANTIC_FACE)
- info->uses_frontface = TRUE;
- }
- }
- else if (file == TGSI_FILE_SYSTEM_VALUE) {
- unsigned index = fulldecl->Range.First;
-
- info->system_value_semantic_name[index] = semName;
- info->num_system_values = MAX2(info->num_system_values,
- index + 1);
-
- if (semName == TGSI_SEMANTIC_INSTANCEID) {
- info->uses_instanceid = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_VERTEXID) {
- info->uses_vertexid = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) {
- info->uses_vertexid_nobase = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_BASEVERTEX) {
- info->uses_basevertex = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_PRIMID) {
- info->uses_primid = TRUE;
- } else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
- info->uses_invocationid = TRUE;
- } else if (semName == TGSI_SEMANTIC_POSITION)
- info->reads_position = TRUE;
- else if (semName == TGSI_SEMANTIC_FACE)
- info->uses_frontface = TRUE;
- else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
- info->reads_samplemask = TRUE;
- }
- else if (file == TGSI_FILE_OUTPUT) {
- info->output_semantic_name[reg] = (ubyte) semName;
- info->output_semantic_index[reg] = (ubyte) semIndex;
- info->num_outputs++;
-
- if (semName == TGSI_SEMANTIC_COLOR)
- info->colors_written |= 1 << semIndex;
-
- if (procType == TGSI_PROCESSOR_VERTEX ||
- procType == TGSI_PROCESSOR_GEOMETRY ||
- procType == TGSI_PROCESSOR_TESS_CTRL ||
- procType == TGSI_PROCESSOR_TESS_EVAL) {
- if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
- info->writes_viewport_index = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_LAYER) {
- info->writes_layer = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_PSIZE) {
- info->writes_psize = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_CLIPVERTEX) {
- info->writes_clipvertex = TRUE;
- }
- }
-
- if (procType == TGSI_PROCESSOR_FRAGMENT) {
- if (semName == TGSI_SEMANTIC_POSITION) {
- info->writes_z = TRUE;
- }
- else if (semName == TGSI_SEMANTIC_STENCIL) {
- info->writes_stencil = TRUE;
- } else if (semName == TGSI_SEMANTIC_SAMPLEMASK) {
- info->writes_samplemask = TRUE;
- }
- }
-
- if (procType == TGSI_PROCESSOR_VERTEX) {
- if (semName == TGSI_SEMANTIC_EDGEFLAG) {
- info->writes_edgeflag = TRUE;
- }
- }
- } else if (file == TGSI_FILE_SAMPLER) {
- info->samplers_declared |= 1 << reg;
- }
- }
- }
+ scan_declaration(info, &parse.FullToken.FullDeclaration);
break;
-
case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- uint reg = info->immediate_count++;
- uint file = TGSI_FILE_IMMEDIATE;
-
- info->file_mask[file] |= (1 << reg);
- info->file_count[file]++;
- info->file_max[file] = MAX2(info->file_max[file], (int)reg);
- }
+ scan_immediate(info);
break;
-
case TGSI_TOKEN_TYPE_PROPERTY:
- {
- const struct tgsi_full_property *fullprop
- = &parse.FullToken.FullProperty;
- unsigned name = fullprop->Property.PropertyName;
- unsigned value = fullprop->u[0].Data;
-
- assert(name < Elements(info->properties));
- info->properties[name] = value;
-
- switch (name) {
- case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
- info->num_written_clipdistance = value;
- info->clipdist_writemask |= (1 << value) - 1;
- break;
- case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
- info->num_written_culldistance = value;
- info->culldist_writemask |= (1 << value) - 1;
- break;
- }
- }
+ scan_property(info, &parse.FullToken.FullProperty);
break;
-
default:
- assert( 0 );
+ assert(!"Unexpected TGSI token type");
}
}
@@ -487,7 +508,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}
}
- tgsi_parse_free (&parse);
+ tgsi_parse_free(&parse);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 3bd512b6f3e..27e6179c9ee 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -192,7 +192,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW;
- decl.Declaration.UsageMask = 0xf;
+ decl.Declaration.UsageMask = TGSI_WRITEMASK_XYZW;
decl.Range.First =
decl.Range.Last = index;
decl.SamplerView.Resource = target;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index d6811501d16..9654ac52bf2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1593,7 +1593,7 @@ emit_decl_sampler_view(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
- out[0].decl.UsageMask = 0xf;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[1].value = 0;
out[1].decl_range.First = index;
@@ -1621,7 +1621,7 @@ emit_decl_image(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = TGSI_FILE_IMAGE;
- out[0].decl.UsageMask = 0xf;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[1].value = 0;
out[1].decl_range.First = index;
@@ -1645,7 +1645,7 @@ emit_decl_buffer(struct ureg_program *ureg,
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 2;
out[0].decl.File = TGSI_FILE_BUFFER;
- out[0].decl.UsageMask = 0xf;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[0].decl.Atomic = atomic;
out[1].value = 0;
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 2b605594a2e..db6635713e5 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -1,9 +1,9 @@
/**************************************************************************
- *
+ *
* Copyright 2008 VMware, Inc.
* Copyright (c) 2008 VMware, Inc.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -11,11 +11,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -23,24 +23,22 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
-#include "pipe/p_config.h"
+#include "pipe/p_config.h"
#include "pipe/p_compiler.h"
-#include "util/u_debug.h"
-#include "pipe/p_format.h"
-#include "pipe/p_state.h"
-#include "util/u_inlines.h"
+#include "util/u_debug.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
#include "util/u_format.h"
-#include "util/u_memory.h"
-#include "util/u_string.h"
-#include "util/u_math.h"
-#include "util/u_tile.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "util/u_math.h"
#include "util/u_prim.h"
-#include "util/u_surface.h"
#include <inttypes.h>
#include <stdio.h>
@@ -53,14 +51,15 @@
#endif
-void _debug_vprintf(const char *format, va_list ap)
+void
+_debug_vprintf(const char *format, va_list ap)
{
static char buf[4096] = {'\0'};
#if defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_EMBEDDED)
/* We buffer until we find a newline. */
size_t len = strlen(buf);
int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap);
- if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) {
+ if (ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) {
os_log_message(buf);
buf[0] = '\0';
}
@@ -70,12 +69,12 @@ void _debug_vprintf(const char *format, va_list ap)
#endif
}
+
void
-_pipe_debug_message(
- struct pipe_debug_callback *cb,
- unsigned *id,
- enum pipe_debug_type type,
- const char *fmt, ...)
+_pipe_debug_message(struct pipe_debug_callback *cb,
+ unsigned *id,
+ enum pipe_debug_type type,
+ const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
@@ -112,9 +111,8 @@ debug_disable_error_message_boxes(void)
#ifdef DEBUG
-void debug_print_blob( const char *name,
- const void *blob,
- unsigned size )
+void
+debug_print_blob(const char *name, const void *blob, unsigned size)
{
const unsigned *ublob = (const unsigned *)blob;
unsigned i;
@@ -147,6 +145,7 @@ debug_get_option_should_print(void)
return value;
}
+
const char *
debug_get_option(const char *name, const char *dfault)
{
@@ -157,39 +156,42 @@ debug_get_option(const char *name, const char *dfault)
result = dfault;
if (debug_get_option_should_print())
- debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
-
+ debug_printf("%s: %s = %s\n", __FUNCTION__, name,
+ result ? result : "(null)");
+
return result;
}
+
boolean
debug_get_bool_option(const char *name, boolean dfault)
{
const char *str = os_get_option(name);
boolean result;
-
- if(str == NULL)
+
+ if (str == NULL)
result = dfault;
- else if(!util_strcmp(str, "n"))
+ else if (!util_strcmp(str, "n"))
result = FALSE;
- else if(!util_strcmp(str, "no"))
+ else if (!util_strcmp(str, "no"))
result = FALSE;
- else if(!util_strcmp(str, "0"))
+ else if (!util_strcmp(str, "0"))
result = FALSE;
- else if(!util_strcmp(str, "f"))
+ else if (!util_strcmp(str, "f"))
result = FALSE;
- else if(!util_strcmp(str, "F"))
+ else if (!util_strcmp(str, "F"))
result = FALSE;
- else if(!util_strcmp(str, "false"))
+ else if (!util_strcmp(str, "false"))
result = FALSE;
- else if(!util_strcmp(str, "FALSE"))
+ else if (!util_strcmp(str, "FALSE"))
result = FALSE;
else
result = TRUE;
if (debug_get_option_should_print())
- debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE");
-
+ debug_printf("%s: %s = %s\n", __FUNCTION__, name,
+ result ? "TRUE" : "FALSE");
+
return result;
}
@@ -199,23 +201,23 @@ debug_get_num_option(const char *name, long dfault)
{
long result;
const char *str;
-
+
str = os_get_option(name);
- if(!str)
+ if (!str)
result = dfault;
else {
long sign;
char c;
c = *str++;
- if(c == '-') {
+ if (c == '-') {
sign = -1;
c = *str++;
- }
+ }
else {
sign = 1;
}
result = 0;
- while('0' <= c && c <= '9') {
+ while ('0' <= c && c <= '9') {
result = result*10 + (c - '0');
c = *str++;
}
@@ -228,7 +230,9 @@ debug_get_num_option(const char *name, long dfault)
return result;
}
-static boolean str_has_option(const char *str, const char *name)
+
+static boolean
+str_has_option(const char *str, const char *name)
{
/* Empty string. */
if (!*str) {
@@ -271,8 +275,9 @@ static boolean str_has_option(const char *str, const char *name)
return FALSE;
}
+
uint64_t
-debug_get_flags_option(const char *name,
+debug_get_flags_option(const char *name,
const struct debug_named_value *flags,
uint64_t dfault)
{
@@ -280,9 +285,9 @@ debug_get_flags_option(const char *name,
const char *str;
const struct debug_named_value *orig = flags;
unsigned namealign = 0;
-
+
str = os_get_option(name);
- if(!str)
+ if (!str)
result = dfault;
else if (!util_strcmp(str, "help")) {
result = dfault;
@@ -296,7 +301,7 @@ debug_get_flags_option(const char *name,
}
else {
result = 0;
- while( flags->name ) {
+ while (flags->name) {
if (str_has_option(str, flags->name))
result |= flags->value;
++flags;
@@ -305,7 +310,8 @@ debug_get_flags_option(const char *name,
if (debug_get_option_should_print()) {
if (str) {
- debug_printf("%s: %s = 0x%"PRIx64" (%s)\n", __FUNCTION__, name, result, str);
+ debug_printf("%s: %s = 0x%"PRIx64" (%s)\n",
+ __FUNCTION__, name, result, str);
} else {
debug_printf("%s: %s = 0x%"PRIx64"\n", __FUNCTION__, name, result);
}
@@ -315,24 +321,24 @@ debug_get_flags_option(const char *name,
}
-void _debug_assert_fail(const char *expr,
- const char *file,
- unsigned line,
- const char *function)
+void
+_debug_assert_fail(const char *expr, const char *file, unsigned line,
+ const char *function)
{
- _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr);
+ _debug_printf("%s:%u:%s: Assertion `%s' failed.\n",
+ file, line, function, expr);
os_abort();
}
const char *
-debug_dump_enum(const struct debug_named_value *names,
+debug_dump_enum(const struct debug_named_value *names,
unsigned long value)
{
static char rest[64];
-
- while(names->name) {
- if(names->value == value)
+
+ while (names->name) {
+ if (names->value == value)
return names->name;
++names;
}
@@ -343,14 +349,14 @@ debug_dump_enum(const struct debug_named_value *names,
const char *
-debug_dump_enum_noprefix(const struct debug_named_value *names,
+debug_dump_enum_noprefix(const struct debug_named_value *names,
const char *prefix,
unsigned long value)
{
static char rest[64];
-
- while(names->name) {
- if(names->value == value) {
+
+ while (names->name) {
+ if (names->value == value) {
const char *name = names->name;
while (*name == *prefix) {
name++;
@@ -361,16 +367,13 @@ debug_dump_enum_noprefix(const struct debug_named_value *names,
++names;
}
-
-
util_snprintf(rest, sizeof(rest), "0x%08lx", value);
return rest;
}
const char *
-debug_dump_flags(const struct debug_named_value *names,
- unsigned long value)
+debug_dump_flags(const struct debug_named_value *names, unsigned long value)
{
static char output[4096];
static char rest[256];
@@ -378,8 +381,8 @@ debug_dump_flags(const struct debug_named_value *names,
output[0] = '\0';
- while(names->name) {
- if((names->value & value) == names->value) {
+ while (names->name) {
+ if ((names->value & value) == names->value) {
if (!first)
util_strncat(output, "|", sizeof(output) - strlen(output) - 1);
else
@@ -390,27 +393,28 @@ debug_dump_flags(const struct debug_named_value *names,
}
++names;
}
-
+
if (value) {
if (!first)
util_strncat(output, "|", sizeof(output) - strlen(output) - 1);
else
first = 0;
-
+
util_snprintf(rest, sizeof(rest), "0x%08lx", value);
util_strncat(output, rest, sizeof(output) - strlen(output) - 1);
output[sizeof(output) - 1] = '\0';
}
-
- if(first)
+
+ if (first)
return "0";
-
+
return output;
}
#ifdef DEBUG
-void debug_print_format(const char *msg, unsigned fmt )
+void
+debug_print_format(const char *msg, unsigned fmt )
{
debug_printf("%s: %s\n", msg, util_format_name(fmt));
}
@@ -447,7 +451,8 @@ u_prim_name(unsigned prim)
int fl_indent = 0;
const char* fl_function[1024];
-int debug_funclog_enter(const char* f, const int line, const char* file)
+int
+debug_funclog_enter(const char* f, const int line, const char* file)
{
int i;
@@ -461,14 +466,16 @@ int debug_funclog_enter(const char* f, const int line, const char* file)
return 0;
}
-void debug_funclog_exit(const char* f, const int line, const char* file)
+void
+debug_funclog_exit(const char* f, const int line, const char* file)
{
--fl_indent;
assert(fl_indent >= 0);
assert(fl_function[fl_indent] == f);
}
-void debug_funclog_enter_exit(const char* f, const int line, const char* file)
+void
+debug_funclog_enter_exit(const char* f, const int line, const char* file)
{
int i;
for (i = 0; i < fl_indent; i++)
@@ -481,313 +488,6 @@ void debug_funclog_enter_exit(const char* f, const int line, const char* file)
#ifdef DEBUG
/**
- * Dump an image to .ppm file.
- * \param format PIPE_FORMAT_x
- * \param cpp bytes per pixel
- * \param width width in pixels
- * \param height height in pixels
- * \param stride row stride in bytes
- */
-void debug_dump_image(const char *prefix,
- enum pipe_format format, unsigned cpp,
- unsigned width, unsigned height,
- unsigned stride,
- const void *data)
-{
- /* write a ppm file */
- char filename[256];
- unsigned char *rgb8;
- FILE *f;
-
- util_snprintf(filename, sizeof(filename), "%s.ppm", prefix);
-
- rgb8 = MALLOC(height * width * 3);
- if (!rgb8) {
- return;
- }
-
- util_format_translate(
- PIPE_FORMAT_R8G8B8_UNORM,
- rgb8, width * 3,
- 0, 0,
- format,
- data, stride,
- 0, 0, width, height);
-
- /* Must be opened in binary mode or DOS line ending causes data
- * to be read with one byte offset.
- */
- f = fopen(filename, "wb");
- if (f) {
- fprintf(f, "P6\n");
- fprintf(f, "# ppm-file created by gallium\n");
- fprintf(f, "%i %i\n", width, height);
- fprintf(f, "255\n");
- fwrite(rgb8, 1, height * width * 3, f);
- fclose(f);
- }
- else {
- fprintf(stderr, "Can't open %s for writing\n", filename);
- }
-
- FREE(rgb8);
-}
-
-/* FIXME: dump resources, not surfaces... */
-void debug_dump_surface(struct pipe_context *pipe,
- const char *prefix,
- struct pipe_surface *surface)
-{
- struct pipe_resource *texture;
- struct pipe_transfer *transfer;
- void *data;
-
- if (!surface)
- return;
-
- /* XXX: this doesn't necessarily work, as the driver may be using
- * temporary storage for the surface which hasn't been propagated
- * back into the texture. Need to nail down the semantics of views
- * and transfers a bit better before we can say if extra work needs
- * to be done here:
- */
- texture = surface->texture;
-
- data = pipe_transfer_map(pipe, texture, surface->u.tex.level,
- surface->u.tex.first_layer,
- PIPE_TRANSFER_READ,
- 0, 0, surface->width, surface->height, &transfer);
- if (!data)
- return;
-
- debug_dump_image(prefix,
- texture->format,
- util_format_get_blocksize(texture->format),
- util_format_get_nblocksx(texture->format, surface->width),
- util_format_get_nblocksy(texture->format, surface->height),
- transfer->stride,
- data);
-
- pipe->transfer_unmap(pipe, transfer);
-}
-
-
-void debug_dump_texture(struct pipe_context *pipe,
- const char *prefix,
- struct pipe_resource *texture)
-{
- struct pipe_surface *surface, surf_tmpl;
-
- if (!texture)
- return;
-
- /* XXX for now, just dump image for layer=0, level=0 */
- u_surface_default_template(&surf_tmpl, texture);
- surface = pipe->create_surface(pipe, texture, &surf_tmpl);
- if (surface) {
- debug_dump_surface(pipe, prefix, surface);
- pipe->surface_destroy(pipe, surface);
- }
-}
-
-
-#pragma pack(push,2)
-struct bmp_file_header {
- uint16_t bfType;
- uint32_t bfSize;
- uint16_t bfReserved1;
- uint16_t bfReserved2;
- uint32_t bfOffBits;
-};
-#pragma pack(pop)
-
-struct bmp_info_header {
- uint32_t biSize;
- int32_t biWidth;
- int32_t biHeight;
- uint16_t biPlanes;
- uint16_t biBitCount;
- uint32_t biCompression;
- uint32_t biSizeImage;
- int32_t biXPelsPerMeter;
- int32_t biYPelsPerMeter;
- uint32_t biClrUsed;
- uint32_t biClrImportant;
-};
-
-struct bmp_rgb_quad {
- uint8_t rgbBlue;
- uint8_t rgbGreen;
- uint8_t rgbRed;
- uint8_t rgbAlpha;
-};
-
-void
-debug_dump_surface_bmp(struct pipe_context *pipe,
- const char *filename,
- struct pipe_surface *surface)
-{
- struct pipe_transfer *transfer;
- struct pipe_resource *texture = surface->texture;
- void *ptr;
-
- ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level,
- surface->u.tex.first_layer, PIPE_TRANSFER_READ,
- 0, 0, surface->width, surface->height, &transfer);
-
- debug_dump_transfer_bmp(pipe, filename, transfer, ptr);
-
- pipe->transfer_unmap(pipe, transfer);
-}
-
-void
-debug_dump_transfer_bmp(struct pipe_context *pipe,
- const char *filename,
- struct pipe_transfer *transfer, void *ptr)
-{
- float *rgba;
-
- if (!transfer)
- goto error1;
-
- rgba = MALLOC(transfer->box.width *
- transfer->box.height *
- transfer->box.depth *
- 4*sizeof(float));
- if (!rgba)
- goto error1;
-
- pipe_get_tile_rgba(transfer, ptr, 0, 0,
- transfer->box.width, transfer->box.height,
- rgba);
-
- debug_dump_float_rgba_bmp(filename,
- transfer->box.width, transfer->box.height,
- rgba, transfer->box.width);
-
- FREE(rgba);
-error1:
- ;
-}
-
-void
-debug_dump_float_rgba_bmp(const char *filename,
- unsigned width, unsigned height,
- float *rgba, unsigned stride)
-{
- FILE *stream;
- struct bmp_file_header bmfh;
- struct bmp_info_header bmih;
- unsigned x, y;
-
- if (!rgba)
- goto error1;
-
- bmfh.bfType = 0x4d42;
- bmfh.bfSize = 14 + 40 + height*width*4;
- bmfh.bfReserved1 = 0;
- bmfh.bfReserved2 = 0;
- bmfh.bfOffBits = 14 + 40;
-
- bmih.biSize = 40;
- bmih.biWidth = width;
- bmih.biHeight = height;
- bmih.biPlanes = 1;
- bmih.biBitCount = 32;
- bmih.biCompression = 0;
- bmih.biSizeImage = height*width*4;
- bmih.biXPelsPerMeter = 0;
- bmih.biYPelsPerMeter = 0;
- bmih.biClrUsed = 0;
- bmih.biClrImportant = 0;
-
- stream = fopen(filename, "wb");
- if (!stream)
- goto error1;
-
- fwrite(&bmfh, 14, 1, stream);
- fwrite(&bmih, 40, 1, stream);
-
- y = height;
- while(y--) {
- float *ptr = rgba + (stride * y * 4);
- for(x = 0; x < width; ++x)
- {
- struct bmp_rgb_quad pixel;
- pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]);
- pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
- pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]);
- pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
- fwrite(&pixel, 1, 4, stream);
- }
- }
-
- fclose(stream);
-error1:
- ;
-}
-
-void
-debug_dump_ubyte_rgba_bmp(const char *filename,
- unsigned width, unsigned height,
- const ubyte *rgba, unsigned stride)
-{
- FILE *stream;
- struct bmp_file_header bmfh;
- struct bmp_info_header bmih;
- unsigned x, y;
-
- assert(rgba);
- if(!rgba)
- goto error1;
-
- bmfh.bfType = 0x4d42;
- bmfh.bfSize = 14 + 40 + height*width*4;
- bmfh.bfReserved1 = 0;
- bmfh.bfReserved2 = 0;
- bmfh.bfOffBits = 14 + 40;
-
- bmih.biSize = 40;
- bmih.biWidth = width;
- bmih.biHeight = height;
- bmih.biPlanes = 1;
- bmih.biBitCount = 32;
- bmih.biCompression = 0;
- bmih.biSizeImage = height*width*4;
- bmih.biXPelsPerMeter = 0;
- bmih.biYPelsPerMeter = 0;
- bmih.biClrUsed = 0;
- bmih.biClrImportant = 0;
-
- stream = fopen(filename, "wb");
- assert(stream);
- if(!stream)
- goto error1;
-
- fwrite(&bmfh, 14, 1, stream);
- fwrite(&bmih, 40, 1, stream);
-
- y = height;
- while(y--) {
- const ubyte *ptr = rgba + (stride * y * 4);
- for(x = 0; x < width; ++x)
- {
- struct bmp_rgb_quad pixel;
- pixel.rgbRed = ptr[x*4 + 0];
- pixel.rgbGreen = ptr[x*4 + 1];
- pixel.rgbBlue = ptr[x*4 + 2];
- pixel.rgbAlpha = ptr[x*4 + 3];
- fwrite(&pixel, 1, 4, stream);
- }
- }
-
- fclose(stream);
-error1:
- ;
-}
-
-
-/**
* Print PIPE_TRANSFER_x flags with a message.
*/
void
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 671bd37a085..c2707b402cb 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -464,45 +464,6 @@ void
debug_memory_end(unsigned long beginning);
-#ifdef DEBUG
-struct pipe_context;
-struct pipe_surface;
-struct pipe_transfer;
-struct pipe_resource;
-
-void debug_dump_image(const char *prefix,
- enum pipe_format format, unsigned cpp,
- unsigned width, unsigned height,
- unsigned stride,
- const void *data);
-void debug_dump_surface(struct pipe_context *pipe,
- const char *prefix,
- struct pipe_surface *surface);
-void debug_dump_texture(struct pipe_context *pipe,
- const char *prefix,
- struct pipe_resource *texture);
-void debug_dump_surface_bmp(struct pipe_context *pipe,
- const char *filename,
- struct pipe_surface *surface);
-void debug_dump_transfer_bmp(struct pipe_context *pipe,
- const char *filename,
- struct pipe_transfer *transfer, void *ptr);
-void debug_dump_float_rgba_bmp(const char *filename,
- unsigned width, unsigned height,
- float *rgba, unsigned stride);
-void debug_dump_ubyte_rgba_bmp(const char *filename,
- unsigned width, unsigned height,
- const ubyte *rgba, unsigned stride);
-#else
-#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
-#define debug_dump_surface(pipe, prefix, surface) ((void)0)
-#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
-#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
-#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
-#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
-#endif
-
-
void
debug_print_transfer_flags(const char *msg, unsigned usage);
diff --git a/src/gallium/auxiliary/util/u_debug_image.c b/src/gallium/auxiliary/util/u_debug_image.c
new file mode 100644
index 00000000000..98d73a63de2
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_image.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2008-2016 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "util/u_debug_image.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "util/u_surface.h"
+#include "util/u_tile.h"
+
+#include <stdio.h>
+
+
+#ifdef DEBUG
+
+/**
+ * Dump an image to .ppm file.
+ * \param format PIPE_FORMAT_x
+ * \param cpp bytes per pixel
+ * \param width width in pixels
+ * \param height height in pixels
+ * \param stride row stride in bytes
+ */
+void
+debug_dump_image(const char *prefix,
+ enum pipe_format format, unsigned cpp,
+ unsigned width, unsigned height,
+ unsigned stride,
+ const void *data)
+{
+ /* write a ppm file */
+ char filename[256];
+ unsigned char *rgb8;
+ FILE *f;
+
+ util_snprintf(filename, sizeof(filename), "%s.ppm", prefix);
+
+ rgb8 = MALLOC(height * width * 3);
+ if (!rgb8) {
+ return;
+ }
+
+ util_format_translate(
+ PIPE_FORMAT_R8G8B8_UNORM,
+ rgb8, width * 3,
+ 0, 0,
+ format,
+ data, stride,
+ 0, 0, width, height);
+
+ /* Must be opened in binary mode or DOS line ending causes data
+ * to be read with one byte offset.
+ */
+ f = fopen(filename, "wb");
+ if (f) {
+ fprintf(f, "P6\n");
+ fprintf(f, "# ppm-file created by gallium\n");
+ fprintf(f, "%i %i\n", width, height);
+ fprintf(f, "255\n");
+ fwrite(rgb8, 1, height * width * 3, f);
+ fclose(f);
+ }
+ else {
+ fprintf(stderr, "Can't open %s for writing\n", filename);
+ }
+
+ FREE(rgb8);
+}
+
+
+/* FIXME: dump resources, not surfaces... */
+void
+debug_dump_surface(struct pipe_context *pipe,
+ const char *prefix,
+ struct pipe_surface *surface)
+{
+ struct pipe_resource *texture;
+ struct pipe_transfer *transfer;
+ void *data;
+
+ if (!surface)
+ return;
+
+ /* XXX: this doesn't necessarily work, as the driver may be using
+ * temporary storage for the surface which hasn't been propagated
+ * back into the texture. Need to nail down the semantics of views
+ * and transfers a bit better before we can say if extra work needs
+ * to be done here:
+ */
+ texture = surface->texture;
+
+ data = pipe_transfer_map(pipe, texture, surface->u.tex.level,
+ surface->u.tex.first_layer,
+ PIPE_TRANSFER_READ,
+ 0, 0, surface->width, surface->height, &transfer);
+ if (!data)
+ return;
+
+ debug_dump_image(prefix,
+ texture->format,
+ util_format_get_blocksize(texture->format),
+ util_format_get_nblocksx(texture->format, surface->width),
+ util_format_get_nblocksy(texture->format, surface->height),
+ transfer->stride,
+ data);
+
+ pipe->transfer_unmap(pipe, transfer);
+}
+
+
+void
+debug_dump_texture(struct pipe_context *pipe,
+ const char *prefix,
+ struct pipe_resource *texture)
+{
+ struct pipe_surface *surface, surf_tmpl;
+
+ if (!texture)
+ return;
+
+ /* XXX for now, just dump image for layer=0, level=0 */
+ u_surface_default_template(&surf_tmpl, texture);
+ surface = pipe->create_surface(pipe, texture, &surf_tmpl);
+ if (surface) {
+ debug_dump_surface(pipe, prefix, surface);
+ pipe->surface_destroy(pipe, surface);
+ }
+}
+
+
+#pragma pack(push,2)
+struct bmp_file_header {
+ uint16_t bfType;
+ uint32_t bfSize;
+ uint16_t bfReserved1;
+ uint16_t bfReserved2;
+ uint32_t bfOffBits;
+};
+#pragma pack(pop)
+
+struct bmp_info_header {
+ uint32_t biSize;
+ int32_t biWidth;
+ int32_t biHeight;
+ uint16_t biPlanes;
+ uint16_t biBitCount;
+ uint32_t biCompression;
+ uint32_t biSizeImage;
+ int32_t biXPelsPerMeter;
+ int32_t biYPelsPerMeter;
+ uint32_t biClrUsed;
+ uint32_t biClrImportant;
+};
+
+struct bmp_rgb_quad {
+ uint8_t rgbBlue;
+ uint8_t rgbGreen;
+ uint8_t rgbRed;
+ uint8_t rgbAlpha;
+};
+
+void
+debug_dump_surface_bmp(struct pipe_context *pipe,
+ const char *filename,
+ struct pipe_surface *surface)
+{
+ struct pipe_transfer *transfer;
+ struct pipe_resource *texture = surface->texture;
+ void *ptr;
+
+ ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level,
+ surface->u.tex.first_layer, PIPE_TRANSFER_READ,
+ 0, 0, surface->width, surface->height, &transfer);
+
+ debug_dump_transfer_bmp(pipe, filename, transfer, ptr);
+
+ pipe->transfer_unmap(pipe, transfer);
+}
+
+void
+debug_dump_transfer_bmp(struct pipe_context *pipe,
+ const char *filename,
+ struct pipe_transfer *transfer, void *ptr)
+{
+ float *rgba;
+
+ if (!transfer)
+ goto error1;
+
+ rgba = MALLOC(transfer->box.width *
+ transfer->box.height *
+ transfer->box.depth *
+ 4*sizeof(float));
+ if (!rgba)
+ goto error1;
+
+ pipe_get_tile_rgba(transfer, ptr, 0, 0,
+ transfer->box.width, transfer->box.height,
+ rgba);
+
+ debug_dump_float_rgba_bmp(filename,
+ transfer->box.width, transfer->box.height,
+ rgba, transfer->box.width);
+
+ FREE(rgba);
+error1:
+ ;
+}
+
+void
+debug_dump_float_rgba_bmp(const char *filename,
+ unsigned width, unsigned height,
+ float *rgba, unsigned stride)
+{
+ FILE *stream;
+ struct bmp_file_header bmfh;
+ struct bmp_info_header bmih;
+ unsigned x, y;
+
+ if (!rgba)
+ goto error1;
+
+ bmfh.bfType = 0x4d42;
+ bmfh.bfSize = 14 + 40 + height*width*4;
+ bmfh.bfReserved1 = 0;
+ bmfh.bfReserved2 = 0;
+ bmfh.bfOffBits = 14 + 40;
+
+ bmih.biSize = 40;
+ bmih.biWidth = width;
+ bmih.biHeight = height;
+ bmih.biPlanes = 1;
+ bmih.biBitCount = 32;
+ bmih.biCompression = 0;
+ bmih.biSizeImage = height*width*4;
+ bmih.biXPelsPerMeter = 0;
+ bmih.biYPelsPerMeter = 0;
+ bmih.biClrUsed = 0;
+ bmih.biClrImportant = 0;
+
+ stream = fopen(filename, "wb");
+ if (!stream)
+ goto error1;
+
+ fwrite(&bmfh, 14, 1, stream);
+ fwrite(&bmih, 40, 1, stream);
+
+ y = height;
+ while (y--) {
+ float *ptr = rgba + (stride * y * 4);
+ for (x = 0; x < width; ++x) {
+ struct bmp_rgb_quad pixel;
+ pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]);
+ pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
+ pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]);
+ pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
+ fwrite(&pixel, 1, 4, stream);
+ }
+ }
+
+ fclose(stream);
+error1:
+ ;
+}
+
+void
+debug_dump_ubyte_rgba_bmp(const char *filename,
+ unsigned width, unsigned height,
+ const ubyte *rgba, unsigned stride)
+{
+ FILE *stream;
+ struct bmp_file_header bmfh;
+ struct bmp_info_header bmih;
+ unsigned x, y;
+
+ assert(rgba);
+ if (!rgba)
+ goto error1;
+
+ bmfh.bfType = 0x4d42;
+ bmfh.bfSize = 14 + 40 + height*width*4;
+ bmfh.bfReserved1 = 0;
+ bmfh.bfReserved2 = 0;
+ bmfh.bfOffBits = 14 + 40;
+
+ bmih.biSize = 40;
+ bmih.biWidth = width;
+ bmih.biHeight = height;
+ bmih.biPlanes = 1;
+ bmih.biBitCount = 32;
+ bmih.biCompression = 0;
+ bmih.biSizeImage = height*width*4;
+ bmih.biXPelsPerMeter = 0;
+ bmih.biYPelsPerMeter = 0;
+ bmih.biClrUsed = 0;
+ bmih.biClrImportant = 0;
+
+ stream = fopen(filename, "wb");
+ assert(stream);
+ if (!stream)
+ goto error1;
+
+ fwrite(&bmfh, 14, 1, stream);
+ fwrite(&bmih, 40, 1, stream);
+
+ y = height;
+ while (y--) {
+ const ubyte *ptr = rgba + (stride * y * 4);
+ for (x = 0; x < width; ++x) {
+ struct bmp_rgb_quad pixel;
+ pixel.rgbRed = ptr[x*4 + 0];
+ pixel.rgbGreen = ptr[x*4 + 1];
+ pixel.rgbBlue = ptr[x*4 + 2];
+ pixel.rgbAlpha = ptr[x*4 + 3];
+ fwrite(&pixel, 1, 4, stream);
+ }
+ }
+
+ fclose(stream);
+error1:
+ ;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug_image.h b/src/gallium/auxiliary/util/u_debug_image.h
new file mode 100644
index 00000000000..f190eec5f52
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_image.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2008-2016 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef U_DEBUG_IMAGE_H
+#define U_DEBUG_IMAGE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+
+#ifdef DEBUG
+struct pipe_context;
+struct pipe_surface;
+struct pipe_transfer;
+struct pipe_resource;
+
+void debug_dump_image(const char *prefix,
+ enum pipe_format format, unsigned cpp,
+ unsigned width, unsigned height,
+ unsigned stride,
+ const void *data);
+void debug_dump_surface(struct pipe_context *pipe,
+ const char *prefix,
+ struct pipe_surface *surface);
+void debug_dump_texture(struct pipe_context *pipe,
+ const char *prefix,
+ struct pipe_resource *texture);
+void debug_dump_surface_bmp(struct pipe_context *pipe,
+ const char *filename,
+ struct pipe_surface *surface);
+void debug_dump_transfer_bmp(struct pipe_context *pipe,
+ const char *filename,
+ struct pipe_transfer *transfer, void *ptr);
+void debug_dump_float_rgba_bmp(const char *filename,
+ unsigned width, unsigned height,
+ float *rgba, unsigned stride);
+void debug_dump_ubyte_rgba_bmp(const char *filename,
+ unsigned width, unsigned height,
+ const ubyte *rgba, unsigned stride);
+#else
+#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
+#define debug_dump_surface(pipe, prefix, surface) ((void)0)
+#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
+#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
+#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
+#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
+#endif
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug_stack.c b/src/gallium/auxiliary/util/u_debug_stack.c
index 68961d3510e..1faa1903a76 100644
--- a/src/gallium/auxiliary/util/u_debug_stack.c
+++ b/src/gallium/auxiliary/util/u_debug_stack.c
@@ -2,7 +2,7 @@
*
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,13 +22,13 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
/**
* @file
* Stack backtracing.
- *
+ *
* @author Jose Fonseca <[email protected]>
*/
@@ -44,12 +44,13 @@
/**
* Capture stack backtrace.
*
- * NOTE: The implementation of this function is quite big, but it is important not to
- * break it down in smaller functions to avoid adding new frames to the calling stack.
+ * NOTE: The implementation of this function is quite big, but it is important
+ * not to break it down in smaller functions to avoid adding new frames to the
+ * calling stack.
*/
void
debug_backtrace_capture(struct debug_stack_frame *backtrace,
- unsigned start_frame,
+ unsigned start_frame,
unsigned nr_frames)
{
const void **frame_pointer = NULL;
@@ -66,7 +67,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
*/
#if defined(PIPE_OS_WINDOWS)
{
- typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG, PVOID *, PULONG);
+ typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG,
+ PVOID *, PULONG);
static PFNCAPTURESTACKBACKTRACE pfnCaptureStackBackTrace = NULL;
if (!pfnCaptureStackBackTrace) {
@@ -76,8 +78,9 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
assert(hModule);
}
if (hModule) {
- pfnCaptureStackBackTrace = (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule,
- "RtlCaptureStackBackTrace");
+ pfnCaptureStackBackTrace =
+ (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule,
+ "RtlCaptureStackBackTrace");
}
}
if (pfnCaptureStackBackTrace) {
@@ -88,7 +91,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
start_frame += 1;
assert(start_frame + nr_frames < 63);
- i = pfnCaptureStackBackTrace(start_frame, nr_frames, (PVOID *) &backtrace->function, NULL);
+ i = pfnCaptureStackBackTrace(start_frame, nr_frames,
+ (PVOID *) &backtrace->function, NULL);
/* Pad remaing requested frames with NULL */
while (i < nr_frames) {
@@ -110,50 +114,49 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
#else
frame_pointer = NULL;
#endif
-
-
+
#ifdef PIPE_ARCH_X86
- while(nr_frames) {
+ while (nr_frames) {
const void **next_frame_pointer;
- if(!frame_pointer)
+ if (!frame_pointer)
break;
-
- if(start_frame)
+
+ if (start_frame)
--start_frame;
else {
backtrace[i++].function = frame_pointer[1];
--nr_frames;
}
-
+
next_frame_pointer = (const void **)frame_pointer[0];
-
+
/* Limit the stack walk to avoid referencing undefined memory */
- if((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer ||
- (uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024)
+ if ((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer ||
+ (uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024)
break;
-
+
frame_pointer = next_frame_pointer;
}
#else
(void) frame_pointer;
#endif
- while(nr_frames) {
+ while (nr_frames) {
backtrace[i++].function = NULL;
--nr_frames;
}
}
-
+
void
-debug_backtrace_dump(const struct debug_stack_frame *backtrace,
+debug_backtrace_dump(const struct debug_stack_frame *backtrace,
unsigned nr_frames)
{
unsigned i;
-
- for(i = 0; i < nr_frames; ++i) {
- if(!backtrace[i].function)
+
+ for (i = 0; i < nr_frames; ++i) {
+ if (!backtrace[i].function)
break;
debug_symbol_print(backtrace[i].function);
}
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c
index 3428172203b..74e6f99da67 100644
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -58,7 +58,7 @@
#define NUM_NEW_TOKENS 53
-static void
+void
util_pstipple_update_stipple_texture(struct pipe_context *pipe,
struct pipe_resource *tex,
const uint32_t pattern[32])
@@ -118,7 +118,7 @@ util_pstipple_create_stipple_texture(struct pipe_context *pipe,
tex = screen->resource_create(screen, &templat);
- if (tex)
+ if (tex && pattern)
util_pstipple_update_stipple_texture(pipe, tex, pattern);
return tex;
diff --git a/src/gallium/auxiliary/util/u_pstipple.h b/src/gallium/auxiliary/util/u_pstipple.h
index ef8396f4318..d1662be2839 100644
--- a/src/gallium/auxiliary/util/u_pstipple.h
+++ b/src/gallium/auxiliary/util/u_pstipple.h
@@ -36,6 +36,11 @@ struct pipe_resource;
struct pipe_shader_state;
+extern void
+util_pstipple_update_stipple_texture(struct pipe_context *pipe,
+ struct pipe_resource *tex,
+ const uint32_t pattern[32]);
+
extern struct pipe_resource *
util_pstipple_create_stipple_texture(struct pipe_context *pipe,
const uint32_t pattern[32]);
diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
index b569c8f9907..caef2a8245c 100644
--- a/src/gallium/auxiliary/util/u_staging.c
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -29,11 +29,14 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
+
static void
-util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template)
+util_staging_resource_template(struct pipe_resource *pt, unsigned width,
+ unsigned height, unsigned depth,
+ struct pipe_resource *template)
{
memset(template, 0, sizeof(struct pipe_resource));
- if(pt->target != PIPE_BUFFER && depth <= 1)
+ if (pt->target != PIPE_BUFFER && depth <= 1)
template->target = PIPE_TEXTURE_RECT;
else
template->target = pt->target;
@@ -49,16 +52,15 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
template->flags = 0;
}
+
struct util_staging_transfer *
util_staging_transfer_init(struct pipe_context *pipe,
- struct pipe_resource *pt,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box,
- boolean direct, struct util_staging_transfer *tx)
+ struct pipe_resource *pt,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ boolean direct, struct util_staging_transfer *tx)
{
struct pipe_screen *pscreen = pipe->screen;
-
struct pipe_resource staging_resource_template;
pipe_resource_reference(&tx->base.resource, pt);
@@ -66,23 +68,22 @@ util_staging_transfer_init(struct pipe_context *pipe,
tx->base.usage = usage;
tx->base.box = *box;
- if (direct)
- {
+ if (direct) {
tx->staging_resource = pt;
return tx;
}
- util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template);
- tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template);
- if (!tx->staging_resource)
- {
+ util_staging_resource_template(pt, box->width, box->height,
+ box->depth, &staging_resource_template);
+ tx->staging_resource = pscreen->resource_create(pscreen,
+ &staging_resource_template);
+ if (!tx->staging_resource) {
pipe_resource_reference(&tx->base.resource, NULL);
FREE(tx);
return NULL;
}
- if (usage & PIPE_TRANSFER_READ)
- {
+ if (usage & PIPE_TRANSFER_READ) {
/* XXX this looks wrong dst is always the same but looping over src z? */
int zi;
struct pipe_box sbox;
@@ -92,7 +93,7 @@ util_staging_transfer_init(struct pipe_context *pipe,
sbox.width = box->width;
sbox.height = box->height;
sbox.depth = 1;
- for(zi = 0; zi < box->depth; ++zi) {
+ for (zi = 0; zi < box->depth; ++zi) {
sbox.z = sbox.z + zi;
pipe->resource_copy_region(pipe, tx->staging_resource, 0, 0, 0, 0,
tx->base.resource, level, &sbox);
@@ -102,14 +103,15 @@ util_staging_transfer_init(struct pipe_context *pipe,
return tx;
}
+
void
-util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+util_staging_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *ptx)
{
struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx;
- if (tx->staging_resource != tx->base.resource)
- {
- if(tx->base.usage & PIPE_TRANSFER_WRITE) {
+ if (tx->staging_resource != tx->base.resource) {
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
/* XXX this looks wrong src is always the same but looping over dst z? */
int zi;
struct pipe_box sbox;
@@ -119,8 +121,10 @@ util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *p
sbox.width = tx->base.box.width;
sbox.height = tx->base.box.height;
sbox.depth = 1;
- for(zi = 0; zi < tx->base.box.depth; ++zi)
- pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi,
+ for (zi = 0; zi < tx->base.box.depth; ++zi)
+ pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level,
+ tx->base.box.x, tx->base.box.y,
+ tx->base.box.z + zi,
tx->staging_resource, 0, &sbox);
}
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
index ddbb33443e4..6c468aad161 100644
--- a/src/gallium/auxiliary/util/u_staging.h
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -42,22 +42,26 @@
struct util_staging_transfer {
struct pipe_transfer base;
- /* if direct, same as base.resource, otherwise the temporary staging resource */
+ /* if direct, same as base.resource, otherwise the temporary staging
+ * resource
+ */
struct pipe_resource *staging_resource;
};
-/* user must be stride, slice_stride and offset */
-/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */
-/* staging resource is currently created with PIPE_USAGE_STAGING */
+/* user must be stride, slice_stride and offset.
+ * pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING
+ * should be a good value to pass for direct staging resource is currently
+ * created with PIPE_USAGE_STAGING
+ */
struct util_staging_transfer *
util_staging_transfer_init(struct pipe_context *pipe,
- struct pipe_resource *pt,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box,
- boolean direct, struct util_staging_transfer *tx);
+ struct pipe_resource *pt,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ boolean direct, struct util_staging_transfer *tx);
void
-util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
+util_staging_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *ptx);
#endif
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 6eb6a2d52ef..f38dc8643b4 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1004,7 +1004,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
nir_const_value *const_offset;
/* UBO addresses are the first driver params: */
unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
- int off = intr->const_index[0];
+ int off = 0;
/* First src is ubo index, which could either be an immed or not: */
src0 = get_src(ctx, &intr->src[0])[0];
@@ -1092,7 +1092,7 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
struct ir3_array *arr = get_var(ctx, dvar->var);
struct ir3_instruction *addr, **src;
- unsigned wrmask = intr->const_index[0];
+ unsigned wrmask = nir_intrinsic_write_mask(intr);
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1145,8 +1145,8 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
struct ir3_instruction **dst, **src;
struct ir3_block *b = ctx->block;
- int idx = intr->const_index[0];
nir_const_value *const_offset;
+ int idx;
if (info->has_dest) {
dst = get_dst(ctx, &intr->dest, intr->num_components);
@@ -1156,6 +1156,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_load_uniform:
+ idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[0]);
if (const_offset) {
idx += const_offset->u[0];
@@ -1182,6 +1183,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
emit_intrinsic_load_ubo(ctx, intr, dst);
break;
case nir_intrinsic_load_input:
+ idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[0]);
if (const_offset) {
idx += const_offset->u[0];
@@ -1208,6 +1210,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
emit_intrinsic_store_var(ctx, intr);
break;
case nir_intrinsic_store_output:
+ idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[1]);
compile_assert(ctx, const_offset != NULL);
idx += const_offset->u[0];
@@ -1243,6 +1246,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
dst[0] = ctx->instance_id;
break;
case nir_intrinsic_load_user_clip_plane:
+ idx = nir_intrinsic_ucp_id(intr);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index 69f36ae5df6..6831d2c4eff 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -71,6 +71,7 @@ query_process_bo(const struct ilo_context *ilo, struct ilo_query *q)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -157,6 +158,7 @@ ilo_init_draw_query(struct ilo_context *ilo, struct ilo_query *q)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
diff --git a/src/gallium/drivers/ilo/ilo_query.c b/src/gallium/drivers/ilo/ilo_query.c
index 27d08128ab0..106bd42a335 100644
--- a/src/gallium/drivers/ilo/ilo_query.c
+++ b/src/gallium/drivers/ilo/ilo_query.c
@@ -47,7 +47,7 @@ static const struct {
#define INFOX(prefix) { NULL, NULL, NULL, NULL, }
[PIPE_QUERY_OCCLUSION_COUNTER] = INFO(draw),
- [PIPE_QUERY_OCCLUSION_PREDICATE] = INFOX(draw),
+ [PIPE_QUERY_OCCLUSION_PREDICATE] = INFO(draw),
[PIPE_QUERY_TIMESTAMP] = INFO(draw),
[PIPE_QUERY_TIMESTAMP_DISJOINT] = INFOX(draw),
[PIPE_QUERY_TIME_ELAPSED] = INFO(draw),
@@ -75,6 +75,7 @@ ilo_create_query(struct pipe_context *pipe, unsigned query_type, unsigned index)
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -163,6 +164,12 @@ query_serialize(const struct ilo_query *q, void *buf)
dst[0] = q->result.u64;
}
break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ {
+ uint64_t *dst = buf;
+ dst[0] = !!q->result.u64;
+ }
+ break;
case PIPE_QUERY_PIPELINE_STATISTICS:
{
const struct pipe_query_data_pipeline_statistics *stats =
diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c
index 8bc04df4fab..9a47ca80505 100644
--- a/src/gallium/drivers/ilo/ilo_render.c
+++ b/src/gallium/drivers/ilo/ilo_render.c
@@ -202,6 +202,7 @@ ilo_render_get_query_len(const struct ilo_render *render,
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
/* no reg */
@@ -268,6 +269,7 @@ ilo_render_emit_query(struct ilo_render *render,
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 268aab26c40..241c2ccafb7 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -32,6 +32,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
+#include "util/u_debug_image.h"
#include "util/u_string.h"
#include "draw/draw_context.h"
#include "lp_flush.h"
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c
index 3980be9579a..75a4b0446fe 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -120,6 +120,7 @@ nv30_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
q->report = 1;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
q->enable = NV30_3D_QUERY_ENABLE;
q->report = 1;
break;
@@ -203,7 +204,6 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
struct nv30_query *q = nv30_query(pq);
volatile uint32_t *ntfy0 = nv30_ntfy(screen, q->qo[0]);
volatile uint32_t *ntfy1 = nv30_ntfy(screen, q->qo[1]);
- uint64_t *res64 = &result->u64;
if (ntfy1) {
while (ntfy1[3] & 0xff000000) {
@@ -227,7 +227,10 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
nv30_query_object_del(screen, &q->qo[1]);
}
- *res64 = q->result;
+ if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE)
+ result->b = !!q->result;
+ else
+ result->u64 = q->result;
return true;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index cccd3b71672..727b509372d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -156,6 +156,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
hq->nesting = nv50->screen->num_occlusion_queries_active++;
if (hq->nesting) {
nv50_hw_query_get(push, q, 0x10, 0x0100f002);
@@ -213,6 +214,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
nv50_hw_query_get(push, q, 0, 0x0100f002);
if (--nv50->screen->num_occlusion_queries_active == 0) {
PUSH_SPACE(push, 2);
@@ -304,6 +306,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res64[0] = hq->data[1] - hq->data[5];
break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ res8[0] = hq->data[1] != hq->data[5];
+ break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
res64[0] = data64[0] - data64[2];
@@ -372,6 +377,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
hq->rotate = 32;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 2cf08897a8d..d92e691fdb8 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -961,8 +961,8 @@ struct pipe_resource *r600_compute_global_buffer_create(
templ->array_size);
result->base.b.vtbl = &r600_global_buffer_vtbl;
- result->base.b.b.screen = screen;
result->base.b.b = *templ;
+ result->base.b.b.screen = screen;
pipe_reference_init(&result->base.b.b.reference, 1);
size_in_dw = (templ->width0+3) / 4;
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 3d0987624a6..474154e52ff 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -55,6 +55,14 @@ enum radeon_llvm_shader_type {
RADEON_LLVM_SHADER_CS = 3,
};
+void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
+{
+ char str[16];
+
+ snprintf(str, sizeof(str), "%i", value);
+ LLVMAddTargetDependentFunctionAttr(F, name, str);
+}
+
/**
* Set the shader type we want to compile
*
@@ -62,7 +70,6 @@ enum radeon_llvm_shader_type {
*/
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
{
- char Str[2];
enum radeon_llvm_shader_type llvm_type;
switch (type) {
@@ -84,9 +91,7 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
assert(0);
}
- sprintf(Str, "%1d", llvm_type);
-
- LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
+ radeon_llvm_add_attribute(F, "ShaderType", llvm_type);
}
static void init_r600_target()
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index 45f05a9e0e1..84dbd2584a1 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -34,6 +34,7 @@
struct pipe_debug_callback;
struct radeon_shader_binary;
+void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value);
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 825fbb181ba..4d27e86b414 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -124,7 +124,8 @@ static void *si_create_compute_state(
code, header->num_bytes);
si_compile_llvm(sctx->screen, &program->kernels[i].binary,
&program->kernels[i].config, sctx->tm,
- mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
+ mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE,
+ "Compute Shader");
si_shader_dump(sctx->screen, &program->kernels[i],
&sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, &program->kernels[i]);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index d60c4515625..b5a4034cc12 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->db_render_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
si_mark_atom_dirty(ctx, &ctx->spi_map);
- si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 48947442757..3c963db5078 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -202,7 +202,6 @@ struct si_context {
struct si_viewports viewports;
struct si_stencil_ref stencil_ref;
struct r600_atom spi_map;
- struct r600_atom spi_ps_input;
/* Precomputed states. */
struct si_pm4_state *init_config;
@@ -222,7 +221,6 @@ struct si_context {
struct si_vertex_element *vertex_elements;
unsigned sprite_coord_enable;
bool flatshade;
- bool force_persample_interp;
/* shader descriptors */
struct si_descriptors vertex_buffers;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d9ed6b234e0..c1d3edc7143 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
}
/* This shouldn't be used by explicit INTERP opcodes. */
-static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
- unsigned param)
+static unsigned select_interp_param(struct si_shader_context *si_shader_ctx,
+ unsigned param)
{
- struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
- unsigned sample_param = 0;
- LLVMValueRef default_ij, sample_ij, force_sample;
-
- default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
+ if (!si_shader_ctx->shader->key.ps.force_persample_interp)
+ return param;
/* If the shader doesn't use center/centroid, just return the parameter.
*
@@ -850,79 +847,52 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
switch (param) {
case SI_PARAM_PERSP_CENTROID:
case SI_PARAM_PERSP_CENTER:
- if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
- return default_ij;
-
- sample_param = SI_PARAM_PERSP_SAMPLE;
- break;
+ return SI_PARAM_PERSP_SAMPLE;
case SI_PARAM_LINEAR_CENTROID:
case SI_PARAM_LINEAR_CENTER:
- if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
- return default_ij;
-
- sample_param = SI_PARAM_LINEAR_SAMPLE;
- break;
+ return SI_PARAM_LINEAR_SAMPLE;
default:
- return default_ij;
+ return param;
}
-
- /* Otherwise, we have to select (i,j) based on a user data SGPR. */
- sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
-
- /* TODO: this can be done more efficiently by switching between
- * 2 prologs.
- */
- force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_PS_STATE_BITS);
- force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
- LLVMInt1TypeInContext(gallivm->context), "");
- return LLVMBuildSelect(gallivm->builder, force_sample,
- sample_ij, default_ij, "");
}
-static void declare_input_fs(
- struct radeon_llvm_context *radeon_bld,
- unsigned input_index,
- const struct tgsi_full_declaration *decl)
+/**
+ * Interpolate a fragment shader input.
+ *
+ * @param si_shader_ctx context
+ * @param input_index index of the input in hardware
+ * @param semantic_name TGSI_SEMANTIC_*
+ * @param semantic_index semantic index
+ * @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset)
+ * @param colors_read_mask color components read (4 bits for each color, 8 bits in total)
+ * @param interp_param interpolation weights (i,j)
+ * @param prim_mask SI_PARAM_PRIM_MASK
+ * @param face SI_PARAM_FRONT_FACE
+ * @param result the return value (4 components)
+ */
+static void interp_fs_input(struct si_shader_context *si_shader_ctx,
+ unsigned input_index,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned num_interp_inputs,
+ unsigned colors_read_mask,
+ LLVMValueRef interp_param,
+ LLVMValueRef prim_mask,
+ LLVMValueRef face,
+ LLVMValueRef result[4])
{
- struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
- struct si_shader_context *si_shader_ctx =
- si_shader_context(&radeon_bld->soa.bld_base);
- struct si_shader *shader = si_shader_ctx->shader;
- struct lp_build_context *uint = &radeon_bld->soa.bld_base.uint_bld;
+ struct lp_build_context *base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
+ struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
struct gallivm_state *gallivm = base->gallivm;
LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
- LLVMValueRef main_fn = radeon_bld->main_fn;
-
- LLVMValueRef interp_param = NULL;
- int interp_param_idx;
const char * intr_name;
-
- /* This value is:
- * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
- * quad begins a new primitive. Bit 0 always needs
- * to be unset)
- * [32:16] ParamOffset
- *
- */
- LLVMValueRef params = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK);
LLVMValueRef attr_number;
unsigned chan;
- shader->ps_input_param_offset[input_index] = shader->nparam++;
- attr_number = lp_build_const_int32(gallivm,
- shader->ps_input_param_offset[input_index]);
-
- shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
- interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
- decl->Interp.Location);
- if (interp_param_idx == -1)
- return;
- else if (interp_param_idx)
- interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
+ attr_number = lp_build_const_int32(gallivm, input_index);
/* fs.constant returns the param from the middle vertex, so it's not
* really useful for flat shading. It's meant to be used for custom
@@ -936,24 +906,28 @@ static void declare_input_fs(
*/
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
- if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+ if (semantic_name == TGSI_SEMANTIC_COLOR &&
si_shader_ctx->shader->key.ps.color_two_side) {
LLVMValueRef args[4];
- LLVMValueRef face, is_face_positive;
- LLVMValueRef back_attr_number =
- lp_build_const_int32(gallivm,
- shader->ps_input_param_offset[input_index] + 1);
+ LLVMValueRef is_face_positive;
+ LLVMValueRef back_attr_number;
- face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
+ /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
+ * otherwise it's at offset "num_inputs".
+ */
+ unsigned back_attr_offset = num_interp_inputs;
+ if (semantic_index == 1 && colors_read_mask & 0xf)
+ back_attr_offset += 1;
+
+ back_attr_number = lp_build_const_int32(gallivm, back_attr_offset);
is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
face, uint->zero, "");
- args[2] = params;
+ args[2] = prim_mask;
args[3] = interp_param;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
- unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
LLVMValueRef front, back;
args[0] = llvm_chan;
@@ -967,48 +941,71 @@ static void declare_input_fs(
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- radeon_bld->inputs[soa_index] =
- LLVMBuildSelect(gallivm->builder,
+ result[chan] = LLVMBuildSelect(gallivm->builder,
is_face_positive,
front,
back,
"");
}
-
- shader->nparam++;
- } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
+ } else if (semantic_name == TGSI_SEMANTIC_FOG) {
LLVMValueRef args[4];
args[0] = uint->zero;
args[1] = attr_number;
- args[2] = params;
+ args[2] = prim_mask;
args[3] = interp_param;
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
- lp_build_intrinsic(gallivm->builder, intr_name,
+ result[0] = lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
- lp_build_const_float(gallivm, 0.0f);
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
- lp_build_const_float(gallivm, 1.0f);
+ result[1] =
+ result[2] = lp_build_const_float(gallivm, 0.0f);
+ result[3] = lp_build_const_float(gallivm, 1.0f);
} else {
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
LLVMValueRef args[4];
LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
- unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
+
args[0] = llvm_chan;
args[1] = attr_number;
- args[2] = params;
+ args[2] = prim_mask;
args[3] = interp_param;
- radeon_bld->inputs[soa_index] =
- lp_build_intrinsic(gallivm->builder, intr_name,
+ result[chan] = lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
}
}
}
+static void declare_input_fs(
+ struct radeon_llvm_context *radeon_bld,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ struct si_shader_context *si_shader_ctx =
+ si_shader_context(&radeon_bld->soa.bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ LLVMValueRef main_fn = radeon_bld->main_fn;
+ LLVMValueRef interp_param = NULL;
+ int interp_param_idx;
+
+ interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
+ decl->Interp.Location);
+ if (interp_param_idx == -1)
+ return;
+ else if (interp_param_idx) {
+ interp_param_idx = select_interp_param(si_shader_ctx,
+ interp_param_idx);
+ interp_param = LLVMGetParam(main_fn, interp_param_idx);
+ }
+
+ interp_fs_input(si_shader_ctx, input_index, decl->Semantic.Name,
+ decl->Semantic.Index, shader->selector->info.num_inputs,
+ shader->selector->info.colors_read, interp_param,
+ LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
+ LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
+ &radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)]);
+}
+
static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
{
return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
@@ -1060,7 +1057,6 @@ static void declare_system_value(
struct si_shader_context *si_shader_ctx =
si_shader_context(&radeon_bld->soa.bld_base);
struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
- struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
struct gallivm_state *gallivm = &radeon_bld->gallivm;
LLVMValueRef value = 0;
@@ -1136,12 +1132,10 @@ static void declare_system_value(
}
case TGSI_SEMANTIC_SAMPLEMASK:
- /* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
- * Therefore, force gl_SampleMaskIn to 1 for GL. */
- if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
- value = uint_bld->one;
- else
- value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
+ /* This can only occur with the OpenGL Core profile, which
+ * doesn't support smoothing.
+ */
+ value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
break;
case TGSI_SEMANTIC_TESSCOORD:
@@ -1965,21 +1959,20 @@ handle_semantic:
}
}
-/* This only writes the tessellation factor levels. */
-static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef rel_patch_id,
+ LLVMValueRef invocation_id,
+ LLVMValueRef tcs_out_current_patch_data_offset)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = si_shader_ctx->shader;
unsigned tess_inner_index, tess_outer_index;
- LLVMValueRef lds_base, lds_inner, lds_outer;
- LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
- LLVMValueRef out[6], vec0, vec1, invocation_id;
+ LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
+ LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base;
unsigned stride, outer_comps, inner_comps, i;
struct lp_build_if_state if_ctx;
- invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
-
/* Do this only for invocation 0, because the tess levels are per-patch,
* not per-vertex.
*
@@ -2018,7 +2011,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
- lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ lds_base = tcs_out_current_patch_data_offset;
lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
lp_build_const_int32(gallivm,
tess_inner_index * 4), "");
@@ -2047,7 +2040,6 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
/* Get the offset. */
tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
SI_PARAM_TESS_FACTOR_OFFSET);
- rel_patch_id = get_rel_patch_id(si_shader_ctx);
byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
lp_build_const_int32(gallivm, 4 * stride), "");
@@ -2060,6 +2052,20 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
lp_build_endif(&if_ctx);
}
+/* This only writes the tessellation factor levels. */
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef invocation_id;
+
+ invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+
+ si_write_tess_factors(bld_base,
+ get_rel_patch_id(si_shader_ctx),
+ invocation_id,
+ get_tcs_out_current_patch_data_offset(si_shader_ctx));
+}
+
static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
@@ -3253,17 +3259,17 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
LLVMValueRef interp_param;
const struct tgsi_full_instruction *inst = emit_data->inst;
const char *intr_name;
- int input_index;
+ int input_index = inst->Src[0].Register.Index;
int chan;
int i;
LLVMValueRef attr_number;
LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
int interp_param_idx;
+ unsigned interp = shader->selector->info.input_interpolate[input_index];
unsigned location;
assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
- input_index = inst->Src[0].Register.Index;
if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
@@ -3271,8 +3277,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
else
location = TGSI_INTERPOLATE_LOC_CENTROID;
- interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index],
- location);
+ interp_param_idx = lookup_interp_param_index(interp, location);
if (interp_param_idx == -1)
return;
else if (interp_param_idx)
@@ -3280,8 +3285,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
else
interp_param = NULL;
- attr_number = lp_build_const_int32(gallivm,
- shader->ps_input_param_offset[input_index]);
+ attr_number = lp_build_const_int32(gallivm, input_index);
if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
@@ -3632,7 +3636,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
case TGSI_PROCESSOR_FRAGMENT:
params[SI_PARAM_ALPHA_REF] = f32;
- params[SI_PARAM_PS_STATE_BITS] = i32;
params[SI_PARAM_PRIM_MASK] = i32;
last_sgpr = SI_PARAM_PRIM_MASK;
params[SI_PARAM_PERSP_SAMPLE] = v2i32;
@@ -3663,10 +3666,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
- if (shader->dx10_clamp_mode)
- LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
- "enable-no-nans-fp-math", "true");
-
for (i = 0; i <= last_sgpr; ++i) {
LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
@@ -3884,7 +3883,7 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
conf->spi_ps_input_ena = value;
break;
case R_0286D0_SPI_PS_INPUT_ADDR:
- /* Not used yet, but will be in the future */
+ conf->spi_ps_input_addr = value;
break;
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
@@ -3904,6 +3903,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
}
break;
}
+
+ if (!conf->spi_ps_input_addr)
+ conf->spi_ps_input_addr = conf->spi_ps_input_ena;
}
}
@@ -4045,6 +4047,13 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
if (r600_can_dump_shader(&sscreen->b, processor)) {
+ if (processor == TGSI_PROCESSOR_FRAGMENT) {
+ fprintf(stderr, "*** SHADER CONFIG ***\n"
+ "SPI_PS_INPUT_ADDR = 0x%04x\n"
+ "SPI_PS_INPUT_ENA = 0x%04x\n",
+ conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+ }
+
fprintf(stderr, "*** SHADER STATS ***\n"
"SGPRS: %d\n"
"VGPRS: %d\n"
@@ -4084,7 +4093,8 @@ int si_compile_llvm(struct si_screen *sscreen,
LLVMTargetMachineRef tm,
LLVMModuleRef mod,
struct pipe_debug_callback *debug,
- unsigned processor)
+ unsigned processor,
+ const char *name)
{
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
@@ -4092,8 +4102,11 @@ int si_compile_llvm(struct si_screen *sscreen,
if (r600_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
- if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
+ if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
+ fprintf(stderr, "%s LLVM IR:\n\n", name);
LLVMDumpModule(mod);
+ fprintf(stderr, "\n");
+ }
}
if (!si_replace_shader(count, binary)) {
@@ -4106,6 +4119,20 @@ int si_compile_llvm(struct si_screen *sscreen,
si_shader_binary_read_config(binary, conf, 0);
+ /* Enable 64-bit and 16-bit denormals, because there is no performance
+ * cost.
+ *
+ * If denormals are enabled, all floating-point output modifiers are
+ * ignored.
+ *
+ * Don't enable denormals for 32-bit floats, because:
+ * - Floating-point output modifiers would be ignored by the hw.
+ * - Some opcodes don't support denormals, such as v_mad_f32. We would
+ * have to stop using those.
+ * - SI & CI would be very slow.
+ */
+ conf->float_mode |= V_00B028_FP_64_DENORMS;
+
FREE(binary->config);
FREE(binary->global_symbol_offsets);
binary->config = NULL;
@@ -4116,7 +4143,7 @@ int si_compile_llvm(struct si_screen *sscreen,
/* Generate code for the hardware VS shader stage to go with a geometry shader */
static int si_generate_gs_copy_shader(struct si_screen *sscreen,
struct si_shader_context *si_shader_ctx,
- struct si_shader *gs, bool dump,
+ struct si_shader *gs,
struct pipe_debug_callback *debug)
{
struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
@@ -4186,14 +4213,14 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
- if (dump)
- fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
-
r = si_compile_llvm(sscreen, &si_shader_ctx->shader->binary,
&si_shader_ctx->shader->config, si_shader_ctx->tm,
bld_base->base.gallivm->module,
- debug, TGSI_PROCESSOR_GEOMETRY);
+ debug, TGSI_PROCESSOR_GEOMETRY,
+ "GS Copy Shader");
if (!r) {
+ if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
+ fprintf(stderr, "GS Copy Shader:\n");
si_shader_dump(sscreen, si_shader_ctx->shader, debug,
TGSI_PROCESSOR_GEOMETRY);
r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
@@ -4250,47 +4277,26 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
}
}
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
- struct si_shader *shader,
- struct pipe_debug_callback *debug)
+static void si_init_shader_ctx(struct si_shader_context *ctx,
+ struct si_screen *sscreen,
+ struct si_shader *shader,
+ LLVMTargetMachineRef tm,
+ struct tgsi_shader_info *info)
{
- struct si_shader_selector *sel = shader->selector;
- struct tgsi_token *tokens = sel->tokens;
- struct si_shader_context si_shader_ctx;
- struct lp_build_tgsi_context * bld_base;
- struct tgsi_shader_info stipple_shader_info;
- LLVMModuleRef mod;
- int r = 0;
- bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
- shader->key.ps.poly_stipple;
- bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor);
-
- if (poly_stipple) {
- tokens = util_pstipple_create_fragment_shader(tokens, NULL,
- SI_POLY_STIPPLE_SAMPLER,
- TGSI_FILE_SYSTEM_VALUE);
- tgsi_scan_shader(tokens, &stipple_shader_info);
- }
-
- /* Dump TGSI code before doing TGSI->LLVM conversion in case the
- * conversion fails. */
- if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
- si_dump_shader_key(sel->type, &shader->key, stderr);
- tgsi_dump(tokens, 0);
- si_dump_streamout(&sel->so);
- }
-
- assert(shader->nparam == 0);
-
- memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
- radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
- bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
-
- if (sel->type != PIPE_SHADER_COMPUTE)
- shader->dx10_clamp_mode = true;
+ struct lp_build_tgsi_context *bld_base;
+
+ memset(ctx, 0, sizeof(*ctx));
+ radeon_llvm_context_init(&ctx->radeon_bld);
+ ctx->tm = tm;
+ ctx->screen = sscreen;
+ if (shader && shader->selector)
+ ctx->type = shader->selector->info.processor;
+ else
+ ctx->type = -1;
+ ctx->shader = shader;
- shader->uses_instanceid = sel->info.uses_instanceid;
- bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
+ bld_base = &ctx->radeon_bld.soa.bld_base;
+ bld_base->info = info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
@@ -4326,12 +4332,45 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
}
+}
+
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ struct si_shader_selector *sel = shader->selector;
+ struct tgsi_token *tokens = sel->tokens;
+ struct si_shader_context si_shader_ctx;
+ struct lp_build_tgsi_context * bld_base;
+ struct tgsi_shader_info stipple_shader_info;
+ LLVMModuleRef mod;
+ int r = 0;
+ bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
+ shader->key.ps.poly_stipple;
+
+ if (poly_stipple) {
+ tokens = util_pstipple_create_fragment_shader(tokens, NULL,
+ SI_POLY_STIPPLE_SAMPLER,
+ TGSI_FILE_SYSTEM_VALUE);
+ tgsi_scan_shader(tokens, &stipple_shader_info);
+ }
+ /* Dump TGSI code before doing TGSI->LLVM conversion in case the
+ * conversion fails. */
+ if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
+ !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
+ si_dump_shader_key(sel->type, &shader->key, stderr);
+ tgsi_dump(tokens, 0);
+ si_dump_streamout(&sel->so);
+ }
+
+ si_init_shader_ctx(&si_shader_ctx, sscreen, shader, tm,
+ poly_stipple ? &stipple_shader_info : &sel->info);
+
+ shader->uses_instanceid = sel->info.uses_instanceid;
+
+ bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
- si_shader_ctx.shader = shader;
- si_shader_ctx.type = tgsi_get_processor_type(tokens);
- si_shader_ctx.screen = sscreen;
- si_shader_ctx.tm = tm;
switch (si_shader_ctx.type) {
case TGSI_PROCESSOR_VERTEX:
@@ -4401,7 +4440,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
- mod, debug, si_shader_ctx.type);
+ mod, debug, si_shader_ctx.type, "TGSI shader");
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
goto out;
@@ -4422,7 +4461,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
shader->gs_copy_shader->selector = shader->selector;
si_shader_ctx.shader = shader->gs_copy_shader;
if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
- shader, dump, debug))) {
+ shader, debug))) {
free(shader->gs_copy_shader);
shader->gs_copy_shader = NULL;
goto out;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 98bdb890a45..c42c51e0455 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -88,7 +88,6 @@ struct radeon_shader_reloc;
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
#define SI_SGPR_ALPHA_REF 8 /* PS only */
-#define SI_SGPR_PS_STATE_BITS 9 /* PS only */
#define SI_VS_NUM_USER_SGPR 13 /* API VS */
#define SI_ES_NUM_USER_SGPR 12 /* API VS */
@@ -97,7 +96,7 @@ struct radeon_shader_reloc;
#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
#define SI_GSCOPY_NUM_USER_SGPR 4
-#define SI_PS_NUM_USER_SGPR 10
+#define SI_PS_NUM_USER_SGPR 9
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
@@ -152,27 +151,23 @@ struct radeon_shader_reloc;
/* PS only parameters */
#define SI_PARAM_ALPHA_REF 4
-/* Bits:
- * 0: force_persample_interp
- */
-#define SI_PARAM_PS_STATE_BITS 5
-#define SI_PARAM_PRIM_MASK 6
-#define SI_PARAM_PERSP_SAMPLE 7
-#define SI_PARAM_PERSP_CENTER 8
-#define SI_PARAM_PERSP_CENTROID 9
-#define SI_PARAM_PERSP_PULL_MODEL 10
-#define SI_PARAM_LINEAR_SAMPLE 11
-#define SI_PARAM_LINEAR_CENTER 12
-#define SI_PARAM_LINEAR_CENTROID 13
-#define SI_PARAM_LINE_STIPPLE_TEX 14
-#define SI_PARAM_POS_X_FLOAT 15
-#define SI_PARAM_POS_Y_FLOAT 16
-#define SI_PARAM_POS_Z_FLOAT 17
-#define SI_PARAM_POS_W_FLOAT 18
-#define SI_PARAM_FRONT_FACE 19
-#define SI_PARAM_ANCILLARY 20
-#define SI_PARAM_SAMPLE_COVERAGE 21
-#define SI_PARAM_POS_FIXED_PT 22
+#define SI_PARAM_PRIM_MASK 5
+#define SI_PARAM_PERSP_SAMPLE 6
+#define SI_PARAM_PERSP_CENTER 7
+#define SI_PARAM_PERSP_CENTROID 8
+#define SI_PARAM_PERSP_PULL_MODEL 9
+#define SI_PARAM_LINEAR_SAMPLE 10
+#define SI_PARAM_LINEAR_CENTER 11
+#define SI_PARAM_LINEAR_CENTROID 12
+#define SI_PARAM_LINE_STIPPLE_TEX 13
+#define SI_PARAM_POS_X_FLOAT 14
+#define SI_PARAM_POS_Y_FLOAT 15
+#define SI_PARAM_POS_Z_FLOAT 16
+#define SI_PARAM_POS_W_FLOAT 17
+#define SI_PARAM_FRONT_FACE 18
+#define SI_PARAM_ANCILLARY 19
+#define SI_PARAM_SAMPLE_COVERAGE 20
+#define SI_PARAM_POS_FIXED_PT 21
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
@@ -193,14 +188,6 @@ struct si_shader_selector {
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
- /* Whether the shader has to use a conditional assignment to
- * choose between weights when emulating
- * pipe_rasterizer_state::force_persample_interp.
- * If false, "si_emit_spi_ps_input" will take care of it instead.
- */
- bool forces_persample_interp_for_persp;
- bool forces_persample_interp_for_linear;
-
/* GS parameters. */
unsigned esgs_itemsize;
unsigned gs_input_verts_per_prim;
@@ -245,6 +232,7 @@ union si_shader_key {
unsigned poly_stipple:1;
unsigned poly_line_smoothing:1;
unsigned clamp_color:1;
+ unsigned force_persample_interp:1;
} ps;
struct {
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
@@ -272,6 +260,7 @@ struct si_shader_config {
unsigned num_vgprs;
unsigned lds_size;
unsigned spi_ps_input_ena;
+ unsigned spi_ps_input_addr;
unsigned float_mode;
unsigned scratch_bytes_per_wave;
unsigned rsrc1;
@@ -290,14 +279,10 @@ struct si_shader {
struct radeon_shader_binary binary;
struct si_shader_config config;
- unsigned nparam;
unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
- unsigned ps_input_param_offset[PIPE_MAX_SHADER_INPUTS];
- unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
bool uses_instanceid;
unsigned nr_pos_exports;
unsigned nr_param_exports;
- bool dx10_clamp_mode; /* convert NaNs to 0 */
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
@@ -343,7 +328,8 @@ int si_compile_llvm(struct si_screen *sscreen,
LLVMTargetMachineRef tm,
LLVMModuleRef mod,
struct pipe_debug_callback *debug,
- unsigned processor);
+ unsigned processor,
+ const char *name);
void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 507f45938ce..e9a017534d1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -133,7 +133,6 @@ union si_state_atoms {
struct r600_atom *viewports;
struct r600_atom *stencil_ref;
struct r600_atom *spi_map;
- struct r600_atom *spi_ps_input;
} s;
struct r600_atom *array[0];
};
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index bbef429edc5..77a4e47c809 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader)
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B528_SGPRS((num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
- S_00B528_DX10_CLAMP(shader->dx10_clamp_mode);
+ S_00B528_DX10_CLAMP(1) |
+ S_00B528_FLOAT_MODE(shader->config.float_mode);
shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
@@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B428_SGPRS((num_sgprs - 1) / 8) |
- S_00B428_DX10_CLAMP(shader->dx10_clamp_mode));
+ S_00B428_DX10_CLAMP(1) |
+ S_00B428_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
S_00B42C_USER_SGPR(num_user_sgprs) |
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader)
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B328_SGPRS((num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
- S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
+ S_00B328_DX10_CLAMP(1) |
+ S_00B328_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
S_00B32C_USER_SGPR(num_user_sgprs) |
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B228_SGPRS((num_sgprs - 1) / 8) |
- S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
+ S_00B228_DX10_CLAMP(1) |
+ S_00B228_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
S_00B22C_USER_SGPR(num_user_sgprs) |
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B128_SGPRS((num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
- S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
+ S_00B128_DX10_CLAMP(1) |
+ S_00B128_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
S_00B12C_USER_SGPR(num_user_sgprs) |
S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
@@ -404,6 +409,18 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
si_set_tesseval_regs(shader, pm4);
}
+static unsigned si_get_ps_num_interp(struct si_shader *ps)
+{
+ struct tgsi_shader_info *info = &ps->selector->info;
+ unsigned num_colors = !!(info->colors_read & 0x0f) +
+ !!(info->colors_read & 0xf0);
+ unsigned num_interp = ps->selector->info.num_inputs +
+ (ps->key.ps.color_two_side ? num_colors : 0);
+
+ assert(num_interp <= 32);
+ return MIN2(num_interp, 32);
+}
+
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
{
unsigned value = shader->key.ps.spi_shader_col_format;
@@ -460,6 +477,17 @@ static void si_shader_ps(struct si_shader *shader)
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
+ unsigned input_ena = shader->config.spi_ps_input_ena;
+
+ /* we need to enable at least one of them, otherwise we hang the GPU */
+ assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
+ G_0286CC_PERSP_CENTER_ENA(input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
+ G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
+ G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
+ G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
+ G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -503,11 +531,15 @@ static void si_shader_ps(struct si_shader *shader)
shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+ si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
+ si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR,
+ shader->config.spi_ps_input_addr);
+
/* Set interpolation controls. */
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
- spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
+ spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader)) |
S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
/* Set registers. */
@@ -540,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B028_SGPRS((num_sgprs - 1) / 8) |
- S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
+ S_00B028_DX10_CLAMP(1) |
+ S_00B028_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
S_00B02C_USER_SGPR(num_user_sgprs) |
@@ -681,7 +714,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
- key->ps.color_two_side = rs->two_side;
+ key->ps.color_two_side = rs->two_side && sel->info.colors_read;
if (sctx->queued.named.blend) {
key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
@@ -694,6 +727,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
(is_line && rs->line_smooth)) &&
sctx->framebuffer.nr_samples <= 1;
key->ps.clamp_color = rs->clamp_fragment_color;
+
+ key->ps.force_persample_interp = rs->force_persample_interp &&
+ rs->multisample_enable &&
+ sctx->framebuffer.nr_samples > 1 &&
+ sctx->ps_iter_samples > 1 &&
+ (sel->info.uses_persp_center ||
+ sel->info.uses_persp_centroid ||
+ sel->info.uses_linear_center ||
+ sel->info.uses_linear_centroid);
}
key->ps.alpha_func = si_get_alpha_test_func(sctx);
@@ -796,7 +838,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor);
p_atomic_inc(&sscreen->b.num_shaders_created);
- /* First set which opcode uses which (i,j) pair. */
+ /* Set which opcode uses which (i,j) pair. */
if (sel->info.uses_persp_opcode_interp_centroid)
sel->info.uses_persp_centroid = true;
@@ -811,19 +853,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->info.uses_linear_opcode_interp_sample)
sel->info.uses_linear_center = true;
- /* Determine if the shader has to use a conditional assignment when
- * emulating force_persample_interp.
- */
- sel->forces_persample_interp_for_persp =
- sel->info.uses_persp_center +
- sel->info.uses_persp_centroid +
- sel->info.uses_persp_sample >= 2;
-
- sel->forces_persample_interp_for_linear =
- sel->info.uses_linear_center +
- sel->info.uses_linear_centroid +
- sel->info.uses_linear_sample >= 2;
-
switch (sel->type) {
case PIPE_SHADER_GEOMETRY:
sel->gs_output_prim =
@@ -893,7 +922,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
}
/* Pre-compilation. */
- if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
+ if (sel->type == PIPE_SHADER_GEOMETRY ||
+ sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
union si_shader_key key;
@@ -1030,6 +1060,41 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
+static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
+{
+ if (shader->pm4) {
+ switch (shader->selector->type) {
+ case PIPE_SHADER_VERTEX:
+ if (shader->key.vs.as_ls)
+ si_pm4_delete_state(sctx, ls, shader->pm4);
+ else if (shader->key.vs.as_es)
+ si_pm4_delete_state(sctx, es, shader->pm4);
+ else
+ si_pm4_delete_state(sctx, vs, shader->pm4);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ si_pm4_delete_state(sctx, hs, shader->pm4);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (shader->key.tes.as_es)
+ si_pm4_delete_state(sctx, es, shader->pm4);
+ else
+ si_pm4_delete_state(sctx, vs, shader->pm4);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ si_pm4_delete_state(sctx, gs, shader->pm4);
+ si_pm4_delete_state(sctx, vs, shader->gs_copy_shader->pm4);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ si_pm4_delete_state(sctx, ps, shader->pm4);
+ break;
+ }
+ }
+
+ si_shader_destroy(shader);
+ free(shader);
+}
+
static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -1050,35 +1115,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
while (p) {
c = p->next_variant;
- switch (sel->type) {
- case PIPE_SHADER_VERTEX:
- if (p->key.vs.as_ls)
- si_pm4_delete_state(sctx, ls, p->pm4);
- else if (p->key.vs.as_es)
- si_pm4_delete_state(sctx, es, p->pm4);
- else
- si_pm4_delete_state(sctx, vs, p->pm4);
- break;
- case PIPE_SHADER_TESS_CTRL:
- si_pm4_delete_state(sctx, hs, p->pm4);
- break;
- case PIPE_SHADER_TESS_EVAL:
- if (p->key.tes.as_es)
- si_pm4_delete_state(sctx, es, p->pm4);
- else
- si_pm4_delete_state(sctx, vs, p->pm4);
- break;
- case PIPE_SHADER_GEOMETRY:
- si_pm4_delete_state(sctx, gs, p->pm4);
- si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4);
- break;
- case PIPE_SHADER_FRAGMENT:
- si_pm4_delete_state(sctx, ps, p->pm4);
- break;
- }
-
- si_shader_destroy(p);
- free(p);
+ si_delete_shader(sctx, p);
p = c;
}
@@ -1087,132 +1124,86 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
free(sel);
}
+static unsigned si_get_ps_input_cntl(struct si_context *sctx,
+ struct si_shader *vs, unsigned name,
+ unsigned index, unsigned interpolate)
+{
+ struct tgsi_shader_info *vsinfo = &vs->selector->info;
+ unsigned j, ps_input_cntl = 0;
+
+ if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
+ (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
+ ps_input_cntl |= S_028644_FLAT_SHADE(1);
+
+ if (name == TGSI_SEMANTIC_PCOORD ||
+ (name == TGSI_SEMANTIC_TEXCOORD &&
+ sctx->sprite_coord_enable & (1 << index))) {
+ ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
+ }
+
+ for (j = 0; j < vsinfo->num_outputs; j++) {
+ if (name == vsinfo->output_semantic_name[j] &&
+ index == vsinfo->output_semantic_index[j]) {
+ ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
+ break;
+ }
+ }
+
+ if (name == TGSI_SEMANTIC_PRIMID)
+ /* PrimID is written after the last output. */
+ ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+ else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
+ /* No corresponding output found, load defaults into input.
+ * Don't set any other bits.
+ * (FLAT_SHADE=1 completely changes behavior) */
+ ps_input_cntl = S_028644_OFFSET(0x20);
+ }
+ return ps_input_cntl;
+}
+
static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
- struct tgsi_shader_info *psinfo;
- struct tgsi_shader_info *vsinfo = &vs->selector->info;
- unsigned i, j, tmp, num_written = 0;
+ struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
+ unsigned i, num_interp, num_written = 0, bcol_interp[2];
- if (!ps || !ps->nparam)
+ if (!ps || !ps->selector->info.num_inputs)
return;
- psinfo = &ps->selector->info;
-
- radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
+ num_interp = si_get_ps_num_interp(ps);
+ assert(num_interp > 0);
+ radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp);
for (i = 0; i < psinfo->num_inputs; i++) {
unsigned name = psinfo->input_semantic_name[i];
unsigned index = psinfo->input_semantic_index[i];
unsigned interpolate = psinfo->input_interpolate[i];
- unsigned param_offset = ps->ps_input_param_offset[i];
-bcolor:
- tmp = 0;
-
- if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
- (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
- tmp |= S_028644_FLAT_SHADE(1);
-
- if (name == TGSI_SEMANTIC_PCOORD ||
- (name == TGSI_SEMANTIC_TEXCOORD &&
- sctx->sprite_coord_enable & (1 << index))) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- for (j = 0; j < vsinfo->num_outputs; j++) {
- if (name == vsinfo->output_semantic_name[j] &&
- index == vsinfo->output_semantic_index[j]) {
- tmp |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
- break;
- }
- }
-
- if (name == TGSI_SEMANTIC_PRIMID)
- /* PrimID is written after the last output. */
- tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
- else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
- /* No corresponding output found, load defaults into input.
- * Don't set any other bits.
- * (FLAT_SHADE=1 completely changes behavior) */
- tmp = S_028644_OFFSET(0x20);
- }
-
- assert(param_offset == num_written);
- radeon_emit(cs, tmp);
+ radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
+ interpolate));
num_written++;
- if (name == TGSI_SEMANTIC_COLOR &&
- ps->key.ps.color_two_side) {
- name = TGSI_SEMANTIC_BCOLOR;
- param_offset++;
- goto bcolor;
+ if (name == TGSI_SEMANTIC_COLOR) {
+ assert(index < ARRAY_SIZE(bcol_interp));
+ bcol_interp[index] = interpolate;
}
}
- assert(ps->nparam == num_written);
-}
-static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- struct si_shader *ps = sctx->ps_shader.current;
- unsigned input_ena;
-
- if (!ps)
- return;
+ if (ps->key.ps.color_two_side) {
+ unsigned bcol = TGSI_SEMANTIC_BCOLOR;
- input_ena = ps->config.spi_ps_input_ena;
+ for (i = 0; i < 2; i++) {
+ if (!(psinfo->colors_read & (0xf << (i * 4))))
+ continue;
- /* we need to enable at least one of them, otherwise we hang the GPU */
- assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
- G_0286CC_PERSP_CENTER_ENA(input_ena) ||
- G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
- G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
- G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
- G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
- G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
- G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
-
- if (sctx->force_persample_interp) {
- unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
- G_0286CC_PERSP_CENTER_ENA(input_ena) +
- G_0286CC_PERSP_CENTROID_ENA(input_ena);
- unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
- G_0286CC_LINEAR_CENTER_ENA(input_ena) +
- G_0286CC_LINEAR_CENTROID_ENA(input_ena);
-
- /* If only one set of (i,j) coordinates is used, we can disable
- * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
- * where CENTER/CENTROID are expected, effectively forcing per-sample
- * interpolation.
- */
- if (num_persp == 1) {
- input_ena &= C_0286CC_PERSP_CENTER_ENA;
- input_ena &= C_0286CC_PERSP_CENTROID_ENA;
- input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
+ radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
+ i, bcol_interp[i]));
+ num_written++;
}
- if (num_linear == 1) {
- input_ena &= C_0286CC_LINEAR_CENTER_ENA;
- input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
- input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
- }
-
- /* If at least 2 sets of coordinates are used, we can't use this
- * trick and have to select SAMPLE using a conditional assignment
- * in the shader with "force_persample_interp" being a shader constant.
- */
}
-
- radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
- radeon_emit(cs, input_ena);
- radeon_emit(cs, input_ena);
-
- if (ps->selector->forces_persample_interp_for_persp ||
- ps->selector->forces_persample_interp_for_linear)
- radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
- SI_SGPR_PS_STATE_BITS * 4,
- sctx->force_persample_interp);
+ assert(num_interp == num_written);
}
/**
@@ -1746,12 +1737,6 @@ bool si_update_shaders(struct si_context *sctx)
si_mark_atom_dirty(sctx, &sctx->spi_map);
}
- if (si_pm4_state_changed(sctx, ps) ||
- sctx->force_persample_interp != rs->force_persample_interp) {
- sctx->force_persample_interp = rs->force_persample_interp;
- si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
- }
-
if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
@@ -1784,7 +1769,6 @@ bool si_update_shaders(struct si_context *sctx)
void si_init_shader_functions(struct si_context *sctx)
{
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
- si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
sctx->b.b.create_vs_state = si_create_shader_selector;
sctx->b.b.create_tcs_state = si_create_shader_selector;
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 9e1e158219f..892084707d2 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -2845,6 +2845,9 @@
#define S_00B028_FLOAT_MODE(x) (((x) & 0xFF) << 12)
#define G_00B028_FLOAT_MODE(x) (((x) >> 12) & 0xFF)
#define C_00B028_FLOAT_MODE 0xFFF00FFF
+#define V_00B028_FP_32_DENORMS 0x30
+#define V_00B028_FP_64_DENORMS 0xc0
+#define V_00B028_FP_ALL_DENORMS 0xf0
#define S_00B028_PRIV(x) (((x) & 0x1) << 20)
#define G_00B028_PRIV(x) (((x) >> 20) & 0x1)
#define C_00B028_PRIV 0xFFEFFFFF
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index 188347bb4ca..5a29e26517d 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -38,6 +38,7 @@
#include "sp_state.h"
#include "sp_tile_cache.h"
#include "sp_tex_tile_cache.h"
+#include "util/u_debug_image.h"
#include "util/u_memory.h"
#include "util/u_string.h"
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
index d593c781680..8e0af12d294 100644
--- a/src/gallium/drivers/svga/svga_pipe_flush.c
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -24,6 +24,7 @@
**********************************************************/
#include "pipe/p_defines.h"
+#include "util/u_debug_image.h"
#include "util/u_string.h"
#include "svga_screen.h"
#include "svga_surface.h"
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 4d03fe1ee0b..2ce2b3aef75 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -120,18 +120,13 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
trace_dump_trace_flush();
if (info->indirect) {
- struct pipe_draw_info *_info = NULL;
+ struct pipe_draw_info _info;
- _info = MALLOC(sizeof(*_info));
- if (!_info)
- return;
-
- memcpy(_info, info, sizeof(*_info));
- _info->indirect = trace_resource_unwrap(tr_ctx, _info->indirect);
- _info->indirect_params = trace_resource_unwrap(tr_ctx,
- _info->indirect_params);
- pipe->draw_vbo(pipe, _info);
- FREE(_info);
+ memcpy(&_info, info, sizeof(_info));
+ _info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect);
+ _info.indirect_params = trace_resource_unwrap(tr_ctx,
+ _info.indirect_params);
+ pipe->draw_vbo(pipe, &_info);
} else {
pipe->draw_vbo(pipe, info);
}
@@ -1285,6 +1280,33 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe,
}
static inline void
+trace_context_clear_texture(struct pipe_context *_pipe,
+ struct pipe_resource *res,
+ unsigned level,
+ const struct pipe_box *box,
+ const void *data)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ res = trace_resource_unwrap(tr_ctx, res);
+
+ trace_dump_call_begin("pipe_context", "clear_texture");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, res);
+ trace_dump_arg(uint, level);
+ trace_dump_arg_begin("box");
+ trace_dump_box(box);
+ trace_dump_arg_end();
+ trace_dump_arg(ptr, data);
+
+ pipe->clear_texture(pipe, res, level, box, data);
+
+ trace_dump_call_end();
+}
+
+static inline void
trace_context_flush(struct pipe_context *_pipe,
struct pipe_fence_handle **fence,
unsigned flags)
@@ -1709,6 +1731,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(clear);
TR_CTX_INIT(clear_render_target);
TR_CTX_INIT(clear_depth_stencil);
+ TR_CTX_INIT(clear_texture);
TR_CTX_INIT(flush);
TR_CTX_INIT(generate_mipmap);
TR_CTX_INIT(texture_barrier);
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 800f16cd250..b01f6ea3dcb 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -349,6 +349,12 @@ enum pipe_flush_flags
#define PIPE_CONTEXT_DEBUG (1 << 1)
/**
+ * Whether out-of-bounds shader loads must return zero and out-of-bounds
+ * shader stores must be dropped.
+ */
+#define PIPE_CONTEXT_ROBUST_BUFFER_ACCESS (1 << 2)
+
+/**
* Flags for pipe_context::memory_barrier.
*/
#define PIPE_BARRIER_MAPPED_BUFFER (1 << 0)
diff --git a/src/gallium/targets/graw-null/graw_util.c b/src/gallium/targets/graw-null/graw_util.c
index 07693e85f6a..03b45d99e9d 100644
--- a/src/gallium/targets/graw-null/graw_util.c
+++ b/src/gallium/targets/graw-null/graw_util.c
@@ -5,6 +5,7 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_text.h"
#include "util/u_debug.h"
+#include "util/u_debug_image.h"
#include "util/u_memory.h"
#include "state_tracker/graw.h"
diff --git a/src/gallium/tests/graw/graw_util.h b/src/gallium/tests/graw/graw_util.h
index f09c1eadc9c..3c7dbd061cc 100644
--- a/src/gallium/tests/graw/graw_util.h
+++ b/src/gallium/tests/graw/graw_util.h
@@ -9,6 +9,7 @@
#include "util/u_box.h"
#include "util/u_debug.h"
+#include "util/u_debug_image.h"
#include "util/u_draw_quad.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c
index 4c5a9200a52..ddee2942af9 100644
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -50,7 +50,7 @@
/* u_sampler_view_default_template */
#include "util/u_sampler.h"
/* debug_dump_surface_bmp */
-#include "util/u_debug.h"
+#include "util/u_debug_image.h"
/* util_draw_vertex_buffer helper */
#include "util/u_draw_quad.h"
/* FREE & CALLOC_STRUCT */
diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c
index c71a63f44e5..914f5e75fa9 100644
--- a/src/gallium/tests/trivial/tri.c
+++ b/src/gallium/tests/trivial/tri.c
@@ -48,7 +48,7 @@
#include "cso_cache/cso_context.h"
/* debug_dump_surface_bmp */
-#include "util/u_debug.h"
+#include "util/u_debug_image.h"
/* util_draw_vertex_buffer helper */
#include "util/u_draw_quad.h"
/* FREE & CALLOC_STRUCT */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 35dc7e69dcf..49c310cfdf7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -405,6 +405,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
&ws->info.num_tile_pipes);
+ /* The kernel returns 12 for some cards for an unknown reason.
+ * I thought this was supposed to be a power of two.
+ */
+ if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12)
+ ws->info.num_tile_pipes = 8;
+
if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
&ws->info.r600_gb_backend_map))
ws->info.r600_gb_backend_map_valid = TRUE;