diff options
Diffstat (limited to 'src/gallium')
42 files changed, 1486 insertions, 1569 deletions
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 6f50f714c3f..84da85c5b96 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -191,11 +191,13 @@ C_SOURCES := \ util/u_cpu_detect.c \ util/u_cpu_detect.h \ util/u_debug.c \ + util/u_debug.h \ util/u_debug_describe.c \ util/u_debug_describe.h \ util/u_debug_flush.c \ util/u_debug_flush.h \ - util/u_debug.h \ + util/u_debug_image.c \ + util/u_debug_image.h \ util/u_debug_memory.c \ util/u_debug_refcnt.c \ util/u_debug_refcnt.h \ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index cf52ca48b26..0298334a28f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -43,10 +43,10 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "util/u_sampler.h" #include "tgsi/tgsi_transform.h" -#include "tgsi/tgsi_dump.h" #include "draw_context.h" #include "draw_pipe.h" @@ -114,178 +114,6 @@ struct pstip_stage }; - -/** - * Subclass of tgsi_transform_context, used for transforming the - * user's fragment shader to add the extra texture sample and fragment kill - * instructions. - */ -struct pstip_transform_context { - struct tgsi_transform_context base; - uint tempsUsed; /**< bitmask */ - int wincoordInput; - int maxInput; - uint samplersUsed; /**< bitfield of samplers used */ - bool hasSview; - int freeSampler; /** an available sampler for the pstipple */ - int texTemp; /**< temp registers */ - int numImmed; -}; - - -/** - * TGSI declaration transform callback. - * Look for a free sampler, a free input attrib, and two free temp regs. - */ -static void -pstip_transform_decl(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *decl) -{ - struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; - - if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { - pctx->samplersUsed |= 1 << i; - } - } - else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { - pctx->hasSview = true; - } - else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last); - if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) - pctx->wincoordInput = (int) decl->Range.First; - } - else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { - uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { - pctx->tempsUsed |= (1 << i); - } - } - - ctx->emit_declaration(ctx, decl); -} - - -/** - * TGSI immediate declaration transform callback. - * We're just counting the number of immediates here. - */ -static void -pstip_transform_immed(struct tgsi_transform_context *ctx, - struct tgsi_full_immediate *immed) -{ - struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; - ctx->emit_immediate(ctx, immed); /* emit to output shader */ - pctx->numImmed++; -} - - -/** - * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. - */ -static int -free_bit(uint bitfield) -{ - return ffs(~bitfield) - 1; -} - - -/** - * TGSI transform prolog callback. - */ -static void -pstip_transform_prolog(struct tgsi_transform_context *ctx) -{ - struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; - uint i; - int wincoordInput; - - /* find free sampler */ - pctx->freeSampler = free_bit(pctx->samplersUsed); - if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) - pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; - - if (pctx->wincoordInput < 0) - wincoordInput = pctx->maxInput + 1; - else - wincoordInput = pctx->wincoordInput; - - /* find one free temp reg */ - for (i = 0; i < 32; i++) { - if ((pctx->tempsUsed & (1 << i)) == 0) { - /* found a free temp */ - if (pctx->texTemp < 0) - pctx->texTemp = i; - else - break; - } - } - assert(pctx->texTemp >= 0); - - if (pctx->wincoordInput < 0) { - /* declare new position input reg */ - tgsi_transform_input_decl(ctx, wincoordInput, - TGSI_SEMANTIC_POSITION, 1, - TGSI_INTERPOLATE_LINEAR); - } - - /* declare new sampler */ - tgsi_transform_sampler_decl(ctx, pctx->freeSampler); - - /* if the src shader has SVIEW decl's for each SAMP decl, we - * need to continue the trend and ensure there is a matching - * SVIEW for the new SAMP we just created - */ - if (pctx->hasSview) { - tgsi_transform_sampler_view_decl(ctx, - pctx->freeSampler, - TGSI_TEXTURE_2D, - TGSI_RETURN_TYPE_FLOAT); - } - - /* declare new temp regs */ - tgsi_transform_temp_decl(ctx, pctx->texTemp); - - /* emit immediate = {1/32, 1/32, 1, 1} - * The index/position of this immediate will be pctx->numImmed - */ - tgsi_transform_immediate_decl(ctx, 1.0/32.0, 1.0/32.0, 1.0, 1.0); - - /* - * Insert new MUL/TEX/KILL_IF instructions at start of program - * Take gl_FragCoord, divide by 32 (stipple size), sample the - * texture and kill fragment if needed. - * - * We'd like to use non-normalized texcoords to index into a RECT - * texture, but we can only use GL_REPEAT wrap mode with normalized - * texcoords. Darn. - */ - - /* MUL texTemp, INPUT[wincoord], 1/32; */ - tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL, - TGSI_FILE_TEMPORARY, pctx->texTemp, - TGSI_WRITEMASK_XYZW, - TGSI_FILE_INPUT, wincoordInput, - TGSI_FILE_IMMEDIATE, pctx->numImmed); - - /* TEX texTemp, texTemp, sampler; */ - tgsi_transform_tex_2d_inst(ctx, - TGSI_FILE_TEMPORARY, pctx->texTemp, - TGSI_FILE_TEMPORARY, pctx->texTemp, - pctx->freeSampler); - - /* KILL_IF -texTemp.wwww; # if -texTemp < 0, KILL fragment */ - tgsi_transform_kill_inst(ctx, - TGSI_FILE_TEMPORARY, pctx->texTemp, - TGSI_SWIZZLE_W, TRUE); -} - - - /** * Generate the frag shader we'll use for doing polygon stipple. * This will be the user's shader prefixed with a TEX and KIL instruction. @@ -293,40 +121,27 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) static boolean generate_pstip_fs(struct pstip_stage *pstip) { + struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; const struct pipe_shader_state *orig_fs = &pstip->fs->state; /*struct draw_context *draw = pstip->stage.draw;*/ struct pipe_shader_state pstip_fs; - struct pstip_transform_context transform; - const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; + enum tgsi_file_type wincoord_file; + + wincoord_file = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL) ? + TGSI_FILE_SYSTEM_VALUE : TGSI_FILE_INPUT; pstip_fs = *orig_fs; /* copy to init */ - pstip_fs.tokens = tgsi_alloc_tokens(newLen); + pstip_fs.tokens = util_pstipple_create_fragment_shader(orig_fs->tokens, + &pstip->fs->sampler_unit, + 0, + wincoord_file); if (pstip_fs.tokens == NULL) return FALSE; - memset(&transform, 0, sizeof(transform)); - transform.wincoordInput = -1; - transform.maxInput = -1; - transform.texTemp = -1; - transform.base.prolog = pstip_transform_prolog; - transform.base.transform_declaration = pstip_transform_decl; - transform.base.transform_immediate = pstip_transform_immed; - - tgsi_transform_shader(orig_fs->tokens, - (struct tgsi_token *) pstip_fs.tokens, - newLen, &transform.base); - -#if 0 /* DEBUG */ - tgsi_dump(orig_fs->tokens, 0); - tgsi_dump(pstip_fs.tokens, 0); -#endif - - assert(pstip->fs); - - pstip->fs->sampler_unit = transform.freeSampler; assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); - pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pipe, &pstip_fs); FREE((void *)pstip_fs.tokens); @@ -338,113 +153,6 @@ generate_pstip_fs(struct pstip_stage *pstip) /** - * Load texture image with current stipple pattern. - */ -static void -pstip_update_texture(struct pstip_stage *pstip) -{ - static const uint bit31 = 1 << 31; - struct pipe_context *pipe = pstip->pipe; - struct pipe_transfer *transfer; - const uint *stipple = pstip->state.stipple->stipple; - uint i, j; - ubyte *data; - - data = pipe_transfer_map(pipe, pstip->texture, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 32, 32, &transfer); - - /* - * Load alpha texture. - * Note: 0 means keep the fragment, 255 means kill it. - * We'll negate the texel value and use KILL_IF which kills if value - * is negative. - */ - for (i = 0; i < 32; i++) { - for (j = 0; j < 32; j++) { - if (stipple[i] & (bit31 >> j)) { - /* fragment "on" */ - data[i * transfer->stride + j] = 0; - } - else { - /* fragment "off" */ - data[i * transfer->stride + j] = 255; - } - } - } - - /* unmap */ - pipe_transfer_unmap(pipe, transfer); -} - - -/** - * Create the texture map we'll use for stippling. - */ -static boolean -pstip_create_texture(struct pstip_stage *pstip) -{ - struct pipe_context *pipe = pstip->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_resource texTemp; - struct pipe_sampler_view viewTempl; - - memset(&texTemp, 0, sizeof(texTemp)); - texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ - texTemp.last_level = 0; - texTemp.width0 = 32; - texTemp.height0 = 32; - texTemp.depth0 = 1; - texTemp.array_size = 1; - texTemp.bind = PIPE_BIND_SAMPLER_VIEW; - - pstip->texture = screen->resource_create(screen, &texTemp); - if (pstip->texture == NULL) - return FALSE; - - u_sampler_view_default_template(&viewTempl, - pstip->texture, - pstip->texture->format); - pstip->sampler_view = pipe->create_sampler_view(pipe, - pstip->texture, - &viewTempl); - if (!pstip->sampler_view) { - return FALSE; - } - - return TRUE; -} - - -/** - * Create the sampler CSO that'll be used for stippling. - */ -static boolean -pstip_create_sampler(struct pstip_stage *pstip) -{ - struct pipe_sampler_state sampler; - struct pipe_context *pipe = pstip->pipe; - - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; - sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; - sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.normalized_coords = 1; - sampler.min_lod = 0.0f; - sampler.max_lod = 0.0f; - - pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); - if (pstip->sampler_cso == NULL) - return FALSE; - - return TRUE; -} - - -/** * When we're about to draw our first stipple polygon in a batch, this function * is called to tell the driver to bind our modified fragment shader. */ @@ -722,7 +430,8 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, /* pass-through */ pstip->driver_set_polygon_stipple(pstip->pipe, stipple); - pstip_update_texture(pstip); + util_pstipple_update_stipple_texture(pstip->pipe, pstip->texture, + pstip->state.stipple->stipple); } @@ -758,10 +467,17 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* create special texture, sampler state */ - if (!pstip_create_texture(pstip)) + pstip->texture = util_pstipple_create_stipple_texture(pipe, NULL); + if (!pstip->texture) + goto fail; + + pstip->sampler_view = util_pstipple_create_sampler_view(pipe, + pstip->texture); + if (!pstip->sampler_view) goto fail; - if (!pstip_create_sampler(pstip)) + pstip->sampler_cso = util_pstipple_create_sampler(pipe); + if (!pstip->sampler_cso) goto fail; /* override the driver's functions */ diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 3e7d69f73ed..61ff0a74379 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -614,8 +614,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, } nir_ssa_def *offset; - if (dim) { - /* UBO loads don't have a const_index[0] base offset. */ + if (op == nir_intrinsic_load_ubo) { + /* UBO loads don't have a base offset. */ offset = nir_imm_int(b, index); if (indirect) { offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect)); @@ -623,7 +623,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */ offset = nir_ishl(b, offset, nir_imm_int(b, 4)); } else { - load->const_index[0] = index; + nir_intrinsic_set_base(load, index); if (indirect) { offset = ttn_src_for_indirect(c, indirect); } else { @@ -1875,7 +1875,7 @@ ttn_emit_instruction(struct ttn_compile *c) &tgsi_dst->Indirect : NULL; store->num_components = 4; - store->const_index[0] = dest.write_mask; + nir_intrinsic_set_write_mask(store, dest.write_mask); store->variables[0] = ttn_array_deref(c, store, var, offset, indirect); store->src[0] = nir_src_for_reg(dest.dest.reg.reg); @@ -1907,8 +1907,8 @@ ttn_add_output_stores(struct ttn_compile *c) store->num_components = 4; store->src[0].reg.reg = c->output_regs[loc].reg; store->src[0].reg.base_offset = c->output_regs[loc].offset; - store->const_index[0] = loc; - store->const_index[1] = 0xf; /* writemask */ + nir_intrinsic_set_base(store, loc); + nir_intrinsic_set_write_mask(store, 0xf); store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &store->instr); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 687fb54830d..489423d7f12 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -44,6 +44,387 @@ +static void +scan_instruction(struct tgsi_shader_info *info, + const struct tgsi_full_instruction *fullinst, + unsigned *current_depth) +{ + unsigned i; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_BGNLOOP: + (*current_depth)++; + info->max_depth = MAX2(info->max_depth, *current_depth); + break; + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ENDLOOP: + (*current_depth)--; + break; + default: + break; + } + + if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID || + fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || + fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { + const struct tgsi_full_src_register *src0 = &fullinst->Src[0]; + unsigned input; + + if (src0->Register.Indirect && src0->Indirect.ArrayID) + input = info->input_array_first[src0->Indirect.ArrayID]; + else + input = src0->Register.Index; + + /* For the INTERP opcodes, the interpolation is always + * PERSPECTIVE unless LINEAR is specified. + */ + switch (info->input_interpolate[input]) { + case TGSI_INTERPOLATE_COLOR: + case TGSI_INTERPOLATE_CONSTANT: + case TGSI_INTERPOLATE_PERSPECTIVE: + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_INTERP_CENTROID: + info->uses_persp_opcode_interp_centroid = TRUE; + break; + case TGSI_OPCODE_INTERP_OFFSET: + info->uses_persp_opcode_interp_offset = TRUE; + break; + case TGSI_OPCODE_INTERP_SAMPLE: + info->uses_persp_opcode_interp_sample = TRUE; + break; + } + break; + + case TGSI_INTERPOLATE_LINEAR: + switch (fullinst->Instruction.Opcode) { + case TGSI_OPCODE_INTERP_CENTROID: + info->uses_linear_opcode_interp_centroid = TRUE; + break; + case TGSI_OPCODE_INTERP_OFFSET: + info->uses_linear_opcode_interp_offset = TRUE; + break; + case TGSI_OPCODE_INTERP_SAMPLE: + info->uses_linear_opcode_interp_sample = TRUE; + break; + } + break; + } + } + + if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && + fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) + info->uses_doubles = TRUE; + + for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = &fullinst->Src[i]; + int ind = src->Register.Index; + + /* Mark which inputs are effectively used */ + if (src->Register.File == TGSI_FILE_INPUT) { + unsigned usage_mask; + usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); + if (src->Register.Indirect) { + for (ind = 0; ind < info->num_inputs; ++ind) { + info->input_usage_mask[ind] |= usage_mask; + } + } else { + assert(ind >= 0); + assert(ind < PIPE_MAX_SHADER_INPUTS); + info->input_usage_mask[ind] |= usage_mask; + } + + if (info->processor == TGSI_PROCESSOR_FRAGMENT && + !src->Register.Indirect) { + unsigned name = + info->input_semantic_name[src->Register.Index]; + unsigned index = + info->input_semantic_index[src->Register.Index]; + + if (name == TGSI_SEMANTIC_POSITION && + (src->Register.SwizzleX == TGSI_SWIZZLE_Z || + src->Register.SwizzleY == TGSI_SWIZZLE_Z || + src->Register.SwizzleZ == TGSI_SWIZZLE_Z || + src->Register.SwizzleW == TGSI_SWIZZLE_Z)) + info->reads_z = TRUE; + + if (name == TGSI_SEMANTIC_COLOR) { + unsigned mask = + (1 << src->Register.SwizzleX) | + (1 << src->Register.SwizzleY) | + (1 << src->Register.SwizzleZ) | + (1 << src->Register.SwizzleW); + + info->colors_read |= mask << (index * 4); + } + } + } + + /* check for indirect register reads */ + if (src->Register.Indirect) { + info->indirect_files |= (1 << src->Register.File); + info->indirect_files_read |= (1 << src->Register.File); + } + + /* MSAA samplers */ + if (src->Register.File == TGSI_FILE_SAMPLER) { + assert(fullinst->Instruction.Texture); + assert(src->Register.Index < Elements(info->is_msaa_sampler)); + + if (fullinst->Instruction.Texture && + (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || + fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { + info->is_msaa_sampler[src->Register.Index] = TRUE; + } + } + } + + /* check for indirect register writes */ + for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; + if (dst->Register.Indirect) { + info->indirect_files |= (1 << dst->Register.File); + info->indirect_files_written |= (1 << dst->Register.File); + } + } + + info->num_instructions++; +} + + +static void +scan_declaration(struct tgsi_shader_info *info, + const struct tgsi_full_declaration *fulldecl) +{ + const uint file = fulldecl->Declaration.File; + const unsigned procType = info->processor; + uint reg; + + if (fulldecl->Declaration.Array) { + unsigned array_id = fulldecl->Array.ArrayID; + + switch (file) { + case TGSI_FILE_INPUT: + assert(array_id < ARRAY_SIZE(info->input_array_first)); + info->input_array_first[array_id] = fulldecl->Range.First; + info->input_array_last[array_id] = fulldecl->Range.Last; + break; + case TGSI_FILE_OUTPUT: + assert(array_id < ARRAY_SIZE(info->output_array_first)); + info->output_array_first[array_id] = fulldecl->Range.First; + info->output_array_last[array_id] = fulldecl->Range.Last; + break; + } + info->array_max[file] = MAX2(info->array_max[file], array_id); + } + + for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) { + unsigned semName = fulldecl->Semantic.Name; + unsigned semIndex = fulldecl->Semantic.Index + + (reg - fulldecl->Range.First); + + /* only first 32 regs will appear in this bitfield */ + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); + + if (file == TGSI_FILE_CONSTANT) { + int buffer = 0; + + if (fulldecl->Declaration.Dimension) + buffer = fulldecl->Dim.Index2D; + + info->const_file_max[buffer] = + MAX2(info->const_file_max[buffer], (int)reg); + } + else if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[reg] = (ubyte) semName; + info->input_semantic_index[reg] = (ubyte) semIndex; + info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; + info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; + info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; + info->num_inputs++; + + /* Only interpolated varyings. Don't include POSITION. + * Don't include integer varyings, because they are not + * interpolated. + */ + if (semName == TGSI_SEMANTIC_GENERIC || + semName == TGSI_SEMANTIC_TEXCOORD || + semName == TGSI_SEMANTIC_COLOR || + semName == TGSI_SEMANTIC_BCOLOR || + semName == TGSI_SEMANTIC_FOG || + semName == TGSI_SEMANTIC_CLIPDIST || + semName == TGSI_SEMANTIC_CULLDIST) { + switch (fulldecl->Interp.Interpolate) { + case TGSI_INTERPOLATE_COLOR: + case TGSI_INTERPOLATE_PERSPECTIVE: + switch (fulldecl->Interp.Location) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_persp_center = TRUE; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_persp_centroid = TRUE; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_persp_sample = TRUE; + break; + } + break; + case TGSI_INTERPOLATE_LINEAR: + switch (fulldecl->Interp.Location) { + case TGSI_INTERPOLATE_LOC_CENTER: + info->uses_linear_center = TRUE; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + info->uses_linear_centroid = TRUE; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + info->uses_linear_sample = TRUE; + break; + } + break; + /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */ + } + } + + if (semName == TGSI_SEMANTIC_PRIMID) + info->uses_primid = TRUE; + else if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (semName == TGSI_SEMANTIC_POSITION) + info->reads_position = TRUE; + else if (semName == TGSI_SEMANTIC_FACE) + info->uses_frontface = TRUE; + } + } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + unsigned index = fulldecl->Range.First; + + info->system_value_semantic_name[index] = semName; + info->num_system_values = MAX2(info->num_system_values, index + 1); + + switch (semName) { + case TGSI_SEMANTIC_INSTANCEID: + info->uses_instanceid = TRUE; + break; + case TGSI_SEMANTIC_VERTEXID: + info->uses_vertexid = TRUE; + break; + case TGSI_SEMANTIC_VERTEXID_NOBASE: + info->uses_vertexid_nobase = TRUE; + break; + case TGSI_SEMANTIC_BASEVERTEX: + info->uses_basevertex = TRUE; + break; + case TGSI_SEMANTIC_PRIMID: + info->uses_primid = TRUE; + break; + case TGSI_SEMANTIC_INVOCATIONID: + info->uses_invocationid = TRUE; + break; + case TGSI_SEMANTIC_POSITION: + info->reads_position = TRUE; + break; + case TGSI_SEMANTIC_FACE: + info->uses_frontface = TRUE; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->reads_samplemask = TRUE; + break; + } + } + else if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[reg] = (ubyte) semName; + info->output_semantic_index[reg] = (ubyte) semIndex; + info->num_outputs++; + + if (semName == TGSI_SEMANTIC_COLOR) + info->colors_written |= 1 << semIndex; + + if (procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY || + procType == TGSI_PROCESSOR_TESS_CTRL || + procType == TGSI_PROCESSOR_TESS_EVAL) { + switch (semName) { + case TGSI_SEMANTIC_VIEWPORT_INDEX: + info->writes_viewport_index = TRUE; + break; + case TGSI_SEMANTIC_LAYER: + info->writes_layer = TRUE; + break; + case TGSI_SEMANTIC_PSIZE: + info->writes_psize = TRUE; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + info->writes_clipvertex = TRUE; + break; + } + } + + if (procType == TGSI_PROCESSOR_FRAGMENT) { + switch (semName) { + case TGSI_SEMANTIC_POSITION: + info->writes_z = TRUE; + break; + case TGSI_SEMANTIC_STENCIL: + info->writes_stencil = TRUE; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->writes_samplemask = TRUE; + break; + } + } + + if (procType == TGSI_PROCESSOR_VERTEX) { + if (semName == TGSI_SEMANTIC_EDGEFLAG) { + info->writes_edgeflag = TRUE; + } + } + } else if (file == TGSI_FILE_SAMPLER) { + info->samplers_declared |= 1 << reg; + } + } +} + + +static void +scan_immediate(struct tgsi_shader_info *info) +{ + uint reg = info->immediate_count++; + uint file = TGSI_FILE_IMMEDIATE; + + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); +} + + +static void +scan_property(struct tgsi_shader_info *info, + const struct tgsi_full_property *fullprop) +{ + unsigned name = fullprop->Property.PropertyName; + unsigned value = fullprop->u[0].Data; + + assert(name < Elements(info->properties)); + info->properties[name] = value; + + switch (name) { + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + info->num_written_clipdistance = value; + info->clipdist_writemask |= (1 << value) - 1; + break; + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + info->num_written_culldistance = value; + info->culldist_writemask |= (1 << value) - 1; + break; + } +} + /** * Scan the given TGSI shader to collect information such as number of @@ -81,390 +462,30 @@ tgsi_scan_shader(const struct tgsi_token *tokens, procType == TGSI_PROCESSOR_COMPUTE); info->processor = procType; - /** ** Loop over incoming program tokens/instructions */ - while( !tgsi_parse_end_of_tokens( &parse ) ) { - + while (!tgsi_parse_end_of_tokens(&parse)) { info->num_tokens++; tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *fullinst - = &parse.FullToken.FullInstruction; - uint i; - - assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); - info->opcode_count[fullinst->Instruction.Opcode]++; - - switch (fullinst->Instruction.Opcode) { - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: - case TGSI_OPCODE_BGNLOOP: - current_depth++; - info->max_depth = MAX2(info->max_depth, current_depth); - break; - case TGSI_OPCODE_ENDIF: - case TGSI_OPCODE_ENDLOOP: - current_depth--; - break; - default: - break; - } - - if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID || - fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || - fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { - const struct tgsi_full_src_register *src0 = &fullinst->Src[0]; - unsigned input; - - if (src0->Register.Indirect && src0->Indirect.ArrayID) - input = info->input_array_first[src0->Indirect.ArrayID]; - else - input = src0->Register.Index; - - /* For the INTERP opcodes, the interpolation is always - * PERSPECTIVE unless LINEAR is specified. - */ - switch (info->input_interpolate[input]) { - case TGSI_INTERPOLATE_COLOR: - case TGSI_INTERPOLATE_CONSTANT: - case TGSI_INTERPOLATE_PERSPECTIVE: - switch (fullinst->Instruction.Opcode) { - case TGSI_OPCODE_INTERP_CENTROID: - info->uses_persp_opcode_interp_centroid = true; - break; - case TGSI_OPCODE_INTERP_OFFSET: - info->uses_persp_opcode_interp_offset = true; - break; - case TGSI_OPCODE_INTERP_SAMPLE: - info->uses_persp_opcode_interp_sample = true; - break; - } - break; - - case TGSI_INTERPOLATE_LINEAR: - switch (fullinst->Instruction.Opcode) { - case TGSI_OPCODE_INTERP_CENTROID: - info->uses_linear_opcode_interp_centroid = true; - break; - case TGSI_OPCODE_INTERP_OFFSET: - info->uses_linear_opcode_interp_offset = true; - break; - case TGSI_OPCODE_INTERP_SAMPLE: - info->uses_linear_opcode_interp_sample = true; - break; - } - break; - } - } - - if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D && - fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) - info->uses_doubles = true; - - for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = - &fullinst->Src[i]; - int ind = src->Register.Index; - - /* Mark which inputs are effectively used */ - if (src->Register.File == TGSI_FILE_INPUT) { - unsigned usage_mask; - usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); - if (src->Register.Indirect) { - for (ind = 0; ind < info->num_inputs; ++ind) { - info->input_usage_mask[ind] |= usage_mask; - } - } else { - assert(ind >= 0); - assert(ind < PIPE_MAX_SHADER_INPUTS); - info->input_usage_mask[ind] |= usage_mask; - } - - if (procType == TGSI_PROCESSOR_FRAGMENT && - !src->Register.Indirect) { - unsigned name = - info->input_semantic_name[src->Register.Index]; - unsigned index = - info->input_semantic_index[src->Register.Index]; - - if (name == TGSI_SEMANTIC_POSITION && - (src->Register.SwizzleX == TGSI_SWIZZLE_Z || - src->Register.SwizzleY == TGSI_SWIZZLE_Z || - src->Register.SwizzleZ == TGSI_SWIZZLE_Z || - src->Register.SwizzleW == TGSI_SWIZZLE_Z)) - info->reads_z = TRUE; - - if (name == TGSI_SEMANTIC_COLOR) { - unsigned mask = - (1 << src->Register.SwizzleX) | - (1 << src->Register.SwizzleY) | - (1 << src->Register.SwizzleZ) | - (1 << src->Register.SwizzleW); - - info->colors_read |= mask << (index * 4); - } - } - } - - /* check for indirect register reads */ - if (src->Register.Indirect) { - info->indirect_files |= (1 << src->Register.File); - info->indirect_files_read |= (1 << src->Register.File); - } - - /* MSAA samplers */ - if (src->Register.File == TGSI_FILE_SAMPLER) { - assert(fullinst->Instruction.Texture); - assert(src->Register.Index < Elements(info->is_msaa_sampler)); - - if (fullinst->Instruction.Texture && - (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || - fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { - info->is_msaa_sampler[src->Register.Index] = TRUE; - } - } - } - - /* check for indirect register writes */ - for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; - if (dst->Register.Indirect) { - info->indirect_files |= (1 << dst->Register.File); - info->indirect_files_written |= (1 << dst->Register.File); - } - } - - info->num_instructions++; - } + scan_instruction(info, &parse.FullToken.FullInstruction, + ¤t_depth); break; - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fulldecl - = &parse.FullToken.FullDeclaration; - const uint file = fulldecl->Declaration.File; - uint reg; - - if (fulldecl->Declaration.Array) { - unsigned array_id = fulldecl->Array.ArrayID; - - switch (file) { - case TGSI_FILE_INPUT: - assert(array_id < ARRAY_SIZE(info->input_array_first)); - info->input_array_first[array_id] = fulldecl->Range.First; - info->input_array_last[array_id] = fulldecl->Range.Last; - break; - case TGSI_FILE_OUTPUT: - assert(array_id < ARRAY_SIZE(info->output_array_first)); - info->output_array_first[array_id] = fulldecl->Range.First; - info->output_array_last[array_id] = fulldecl->Range.Last; - break; - } - info->array_max[file] = MAX2(info->array_max[file], array_id); - } - - for (reg = fulldecl->Range.First; - reg <= fulldecl->Range.Last; - reg++) { - unsigned semName = fulldecl->Semantic.Name; - unsigned semIndex = - fulldecl->Semantic.Index + (reg - fulldecl->Range.First); - - /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << reg); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)reg); - - if (file == TGSI_FILE_CONSTANT) { - int buffer = 0; - - if (fulldecl->Declaration.Dimension) - buffer = fulldecl->Dim.Index2D; - - info->const_file_max[buffer] = - MAX2(info->const_file_max[buffer], (int)reg); - } - else if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[reg] = (ubyte) semName; - info->input_semantic_index[reg] = (ubyte) semIndex; - info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; - info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; - info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; - info->num_inputs++; - - /* Only interpolated varyings. Don't include POSITION. - * Don't include integer varyings, because they are not - * interpolated. - */ - if (semName == TGSI_SEMANTIC_GENERIC || - semName == TGSI_SEMANTIC_TEXCOORD || - semName == TGSI_SEMANTIC_COLOR || - semName == TGSI_SEMANTIC_BCOLOR || - semName == TGSI_SEMANTIC_FOG || - semName == TGSI_SEMANTIC_CLIPDIST || - semName == TGSI_SEMANTIC_CULLDIST) { - switch (fulldecl->Interp.Interpolate) { - case TGSI_INTERPOLATE_COLOR: - case TGSI_INTERPOLATE_PERSPECTIVE: - switch (fulldecl->Interp.Location) { - case TGSI_INTERPOLATE_LOC_CENTER: - info->uses_persp_center = true; - break; - case TGSI_INTERPOLATE_LOC_CENTROID: - info->uses_persp_centroid = true; - break; - case TGSI_INTERPOLATE_LOC_SAMPLE: - info->uses_persp_sample = true; - break; - } - break; - case TGSI_INTERPOLATE_LINEAR: - switch (fulldecl->Interp.Location) { - case TGSI_INTERPOLATE_LOC_CENTER: - info->uses_linear_center = true; - break; - case TGSI_INTERPOLATE_LOC_CENTROID: - info->uses_linear_centroid = true; - break; - case TGSI_INTERPOLATE_LOC_SAMPLE: - info->uses_linear_sample = true; - break; - } - break; - /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */ - } - } - - if (semName == TGSI_SEMANTIC_PRIMID) - info->uses_primid = TRUE; - else if (procType == TGSI_PROCESSOR_FRAGMENT) { - if (semName == TGSI_SEMANTIC_POSITION) - info->reads_position = TRUE; - else if (semName == TGSI_SEMANTIC_FACE) - info->uses_frontface = TRUE; - } - } - else if (file == TGSI_FILE_SYSTEM_VALUE) { - unsigned index = fulldecl->Range.First; - - info->system_value_semantic_name[index] = semName; - info->num_system_values = MAX2(info->num_system_values, - index + 1); - - if (semName == TGSI_SEMANTIC_INSTANCEID) { - info->uses_instanceid = TRUE; - } - else if (semName == TGSI_SEMANTIC_VERTEXID) { - info->uses_vertexid = TRUE; - } - else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) { - info->uses_vertexid_nobase = TRUE; - } - else if (semName == TGSI_SEMANTIC_BASEVERTEX) { - info->uses_basevertex = TRUE; - } - else if (semName == TGSI_SEMANTIC_PRIMID) { - info->uses_primid = TRUE; - } else if (semName == TGSI_SEMANTIC_INVOCATIONID) { - info->uses_invocationid = TRUE; - } else if (semName == TGSI_SEMANTIC_POSITION) - info->reads_position = TRUE; - else if (semName == TGSI_SEMANTIC_FACE) - info->uses_frontface = TRUE; - else if (semName == TGSI_SEMANTIC_SAMPLEMASK) - info->reads_samplemask = TRUE; - } - else if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[reg] = (ubyte) semName; - info->output_semantic_index[reg] = (ubyte) semIndex; - info->num_outputs++; - - if (semName == TGSI_SEMANTIC_COLOR) - info->colors_written |= 1 << semIndex; - - if (procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY || - procType == TGSI_PROCESSOR_TESS_CTRL || - procType == TGSI_PROCESSOR_TESS_EVAL) { - if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { - info->writes_viewport_index = TRUE; - } - else if (semName == TGSI_SEMANTIC_LAYER) { - info->writes_layer = TRUE; - } - else if (semName == TGSI_SEMANTIC_PSIZE) { - info->writes_psize = TRUE; - } - else if (semName == TGSI_SEMANTIC_CLIPVERTEX) { - info->writes_clipvertex = TRUE; - } - } - - if (procType == TGSI_PROCESSOR_FRAGMENT) { - if (semName == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } - else if (semName == TGSI_SEMANTIC_STENCIL) { - info->writes_stencil = TRUE; - } else if (semName == TGSI_SEMANTIC_SAMPLEMASK) { - info->writes_samplemask = TRUE; - } - } - - if (procType == TGSI_PROCESSOR_VERTEX) { - if (semName == TGSI_SEMANTIC_EDGEFLAG) { - info->writes_edgeflag = TRUE; - } - } - } else if (file == TGSI_FILE_SAMPLER) { - info->samplers_declared |= 1 << reg; - } - } - } + scan_declaration(info, &parse.FullToken.FullDeclaration); break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - uint reg = info->immediate_count++; - uint file = TGSI_FILE_IMMEDIATE; - - info->file_mask[file] |= (1 << reg); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)reg); - } + scan_immediate(info); break; - case TGSI_TOKEN_TYPE_PROPERTY: - { - const struct tgsi_full_property *fullprop - = &parse.FullToken.FullProperty; - unsigned name = fullprop->Property.PropertyName; - unsigned value = fullprop->u[0].Data; - - assert(name < Elements(info->properties)); - info->properties[name] = value; - - switch (name) { - case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: - info->num_written_clipdistance = value; - info->clipdist_writemask |= (1 << value) - 1; - break; - case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: - info->num_written_culldistance = value; - info->culldist_writemask |= (1 << value) - 1; - break; - } - } + scan_property(info, &parse.FullToken.FullProperty); break; - default: - assert( 0 ); + assert(!"Unexpected TGSI token type"); } } @@ -487,7 +508,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } - tgsi_parse_free (&parse); + tgsi_parse_free(&parse); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index 3bd512b6f3e..27e6179c9ee 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -192,7 +192,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW; - decl.Declaration.UsageMask = 0xf; + decl.Declaration.UsageMask = TGSI_WRITEMASK_XYZW; decl.Range.First = decl.Range.Last = index; decl.SamplerView.Resource = target; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index d6811501d16..9654ac52bf2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1593,7 +1593,7 @@ emit_decl_sampler_view(struct ureg_program *ureg, out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 3; out[0].decl.File = TGSI_FILE_SAMPLER_VIEW; - out[0].decl.UsageMask = 0xf; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; out[1].value = 0; out[1].decl_range.First = index; @@ -1621,7 +1621,7 @@ emit_decl_image(struct ureg_program *ureg, out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 3; out[0].decl.File = TGSI_FILE_IMAGE; - out[0].decl.UsageMask = 0xf; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; out[1].value = 0; out[1].decl_range.First = index; @@ -1645,7 +1645,7 @@ emit_decl_buffer(struct ureg_program *ureg, out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; out[0].decl.NrTokens = 2; out[0].decl.File = TGSI_FILE_BUFFER; - out[0].decl.UsageMask = 0xf; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; out[0].decl.Atomic = atomic; out[1].value = 0; diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 2b605594a2e..db6635713e5 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -1,9 +1,9 @@ /************************************************************************** - * + * * Copyright 2008 VMware, Inc. * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -11,11 +11,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -23,24 +23,22 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#include "pipe/p_config.h" +#include "pipe/p_config.h" #include "pipe/p_compiler.h" -#include "util/u_debug.h" -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" +#include "util/u_debug.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" #include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_string.h" -#include "util/u_math.h" -#include "util/u_tile.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_math.h" #include "util/u_prim.h" -#include "util/u_surface.h" #include <inttypes.h> #include <stdio.h> @@ -53,14 +51,15 @@ #endif -void _debug_vprintf(const char *format, va_list ap) +void +_debug_vprintf(const char *format, va_list ap) { static char buf[4096] = {'\0'}; #if defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_EMBEDDED) /* We buffer until we find a newline. */ size_t len = strlen(buf); int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); - if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { + if (ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { os_log_message(buf); buf[0] = '\0'; } @@ -70,12 +69,12 @@ void _debug_vprintf(const char *format, va_list ap) #endif } + void -_pipe_debug_message( - struct pipe_debug_callback *cb, - unsigned *id, - enum pipe_debug_type type, - const char *fmt, ...) +_pipe_debug_message(struct pipe_debug_callback *cb, + unsigned *id, + enum pipe_debug_type type, + const char *fmt, ...) { va_list args; va_start(args, fmt); @@ -112,9 +111,8 @@ debug_disable_error_message_boxes(void) #ifdef DEBUG -void debug_print_blob( const char *name, - const void *blob, - unsigned size ) +void +debug_print_blob(const char *name, const void *blob, unsigned size) { const unsigned *ublob = (const unsigned *)blob; unsigned i; @@ -147,6 +145,7 @@ debug_get_option_should_print(void) return value; } + const char * debug_get_option(const char *name, const char *dfault) { @@ -157,39 +156,42 @@ debug_get_option(const char *name, const char *dfault) result = dfault; if (debug_get_option_should_print()) - debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); - + debug_printf("%s: %s = %s\n", __FUNCTION__, name, + result ? result : "(null)"); + return result; } + boolean debug_get_bool_option(const char *name, boolean dfault) { const char *str = os_get_option(name); boolean result; - - if(str == NULL) + + if (str == NULL) result = dfault; - else if(!util_strcmp(str, "n")) + else if (!util_strcmp(str, "n")) result = FALSE; - else if(!util_strcmp(str, "no")) + else if (!util_strcmp(str, "no")) result = FALSE; - else if(!util_strcmp(str, "0")) + else if (!util_strcmp(str, "0")) result = FALSE; - else if(!util_strcmp(str, "f")) + else if (!util_strcmp(str, "f")) result = FALSE; - else if(!util_strcmp(str, "F")) + else if (!util_strcmp(str, "F")) result = FALSE; - else if(!util_strcmp(str, "false")) + else if (!util_strcmp(str, "false")) result = FALSE; - else if(!util_strcmp(str, "FALSE")) + else if (!util_strcmp(str, "FALSE")) result = FALSE; else result = TRUE; if (debug_get_option_should_print()) - debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE"); - + debug_printf("%s: %s = %s\n", __FUNCTION__, name, + result ? "TRUE" : "FALSE"); + return result; } @@ -199,23 +201,23 @@ debug_get_num_option(const char *name, long dfault) { long result; const char *str; - + str = os_get_option(name); - if(!str) + if (!str) result = dfault; else { long sign; char c; c = *str++; - if(c == '-') { + if (c == '-') { sign = -1; c = *str++; - } + } else { sign = 1; } result = 0; - while('0' <= c && c <= '9') { + while ('0' <= c && c <= '9') { result = result*10 + (c - '0'); c = *str++; } @@ -228,7 +230,9 @@ debug_get_num_option(const char *name, long dfault) return result; } -static boolean str_has_option(const char *str, const char *name) + +static boolean +str_has_option(const char *str, const char *name) { /* Empty string. */ if (!*str) { @@ -271,8 +275,9 @@ static boolean str_has_option(const char *str, const char *name) return FALSE; } + uint64_t -debug_get_flags_option(const char *name, +debug_get_flags_option(const char *name, const struct debug_named_value *flags, uint64_t dfault) { @@ -280,9 +285,9 @@ debug_get_flags_option(const char *name, const char *str; const struct debug_named_value *orig = flags; unsigned namealign = 0; - + str = os_get_option(name); - if(!str) + if (!str) result = dfault; else if (!util_strcmp(str, "help")) { result = dfault; @@ -296,7 +301,7 @@ debug_get_flags_option(const char *name, } else { result = 0; - while( flags->name ) { + while (flags->name) { if (str_has_option(str, flags->name)) result |= flags->value; ++flags; @@ -305,7 +310,8 @@ debug_get_flags_option(const char *name, if (debug_get_option_should_print()) { if (str) { - debug_printf("%s: %s = 0x%"PRIx64" (%s)\n", __FUNCTION__, name, result, str); + debug_printf("%s: %s = 0x%"PRIx64" (%s)\n", + __FUNCTION__, name, result, str); } else { debug_printf("%s: %s = 0x%"PRIx64"\n", __FUNCTION__, name, result); } @@ -315,24 +321,24 @@ debug_get_flags_option(const char *name, } -void _debug_assert_fail(const char *expr, - const char *file, - unsigned line, - const char *function) +void +_debug_assert_fail(const char *expr, const char *file, unsigned line, + const char *function) { - _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); + _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", + file, line, function, expr); os_abort(); } const char * -debug_dump_enum(const struct debug_named_value *names, +debug_dump_enum(const struct debug_named_value *names, unsigned long value) { static char rest[64]; - - while(names->name) { - if(names->value == value) + + while (names->name) { + if (names->value == value) return names->name; ++names; } @@ -343,14 +349,14 @@ debug_dump_enum(const struct debug_named_value *names, const char * -debug_dump_enum_noprefix(const struct debug_named_value *names, +debug_dump_enum_noprefix(const struct debug_named_value *names, const char *prefix, unsigned long value) { static char rest[64]; - - while(names->name) { - if(names->value == value) { + + while (names->name) { + if (names->value == value) { const char *name = names->name; while (*name == *prefix) { name++; @@ -361,16 +367,13 @@ debug_dump_enum_noprefix(const struct debug_named_value *names, ++names; } - - util_snprintf(rest, sizeof(rest), "0x%08lx", value); return rest; } const char * -debug_dump_flags(const struct debug_named_value *names, - unsigned long value) +debug_dump_flags(const struct debug_named_value *names, unsigned long value) { static char output[4096]; static char rest[256]; @@ -378,8 +381,8 @@ debug_dump_flags(const struct debug_named_value *names, output[0] = '\0'; - while(names->name) { - if((names->value & value) == names->value) { + while (names->name) { + if ((names->value & value) == names->value) { if (!first) util_strncat(output, "|", sizeof(output) - strlen(output) - 1); else @@ -390,27 +393,28 @@ debug_dump_flags(const struct debug_named_value *names, } ++names; } - + if (value) { if (!first) util_strncat(output, "|", sizeof(output) - strlen(output) - 1); else first = 0; - + util_snprintf(rest, sizeof(rest), "0x%08lx", value); util_strncat(output, rest, sizeof(output) - strlen(output) - 1); output[sizeof(output) - 1] = '\0'; } - - if(first) + + if (first) return "0"; - + return output; } #ifdef DEBUG -void debug_print_format(const char *msg, unsigned fmt ) +void +debug_print_format(const char *msg, unsigned fmt ) { debug_printf("%s: %s\n", msg, util_format_name(fmt)); } @@ -447,7 +451,8 @@ u_prim_name(unsigned prim) int fl_indent = 0; const char* fl_function[1024]; -int debug_funclog_enter(const char* f, const int line, const char* file) +int +debug_funclog_enter(const char* f, const int line, const char* file) { int i; @@ -461,14 +466,16 @@ int debug_funclog_enter(const char* f, const int line, const char* file) return 0; } -void debug_funclog_exit(const char* f, const int line, const char* file) +void +debug_funclog_exit(const char* f, const int line, const char* file) { --fl_indent; assert(fl_indent >= 0); assert(fl_function[fl_indent] == f); } -void debug_funclog_enter_exit(const char* f, const int line, const char* file) +void +debug_funclog_enter_exit(const char* f, const int line, const char* file) { int i; for (i = 0; i < fl_indent; i++) @@ -481,313 +488,6 @@ void debug_funclog_enter_exit(const char* f, const int line, const char* file) #ifdef DEBUG /** - * Dump an image to .ppm file. - * \param format PIPE_FORMAT_x - * \param cpp bytes per pixel - * \param width width in pixels - * \param height height in pixels - * \param stride row stride in bytes - */ -void debug_dump_image(const char *prefix, - enum pipe_format format, unsigned cpp, - unsigned width, unsigned height, - unsigned stride, - const void *data) -{ - /* write a ppm file */ - char filename[256]; - unsigned char *rgb8; - FILE *f; - - util_snprintf(filename, sizeof(filename), "%s.ppm", prefix); - - rgb8 = MALLOC(height * width * 3); - if (!rgb8) { - return; - } - - util_format_translate( - PIPE_FORMAT_R8G8B8_UNORM, - rgb8, width * 3, - 0, 0, - format, - data, stride, - 0, 0, width, height); - - /* Must be opened in binary mode or DOS line ending causes data - * to be read with one byte offset. - */ - f = fopen(filename, "wb"); - if (f) { - fprintf(f, "P6\n"); - fprintf(f, "# ppm-file created by gallium\n"); - fprintf(f, "%i %i\n", width, height); - fprintf(f, "255\n"); - fwrite(rgb8, 1, height * width * 3, f); - fclose(f); - } - else { - fprintf(stderr, "Can't open %s for writing\n", filename); - } - - FREE(rgb8); -} - -/* FIXME: dump resources, not surfaces... */ -void debug_dump_surface(struct pipe_context *pipe, - const char *prefix, - struct pipe_surface *surface) -{ - struct pipe_resource *texture; - struct pipe_transfer *transfer; - void *data; - - if (!surface) - return; - - /* XXX: this doesn't necessarily work, as the driver may be using - * temporary storage for the surface which hasn't been propagated - * back into the texture. Need to nail down the semantics of views - * and transfers a bit better before we can say if extra work needs - * to be done here: - */ - texture = surface->texture; - - data = pipe_transfer_map(pipe, texture, surface->u.tex.level, - surface->u.tex.first_layer, - PIPE_TRANSFER_READ, - 0, 0, surface->width, surface->height, &transfer); - if (!data) - return; - - debug_dump_image(prefix, - texture->format, - util_format_get_blocksize(texture->format), - util_format_get_nblocksx(texture->format, surface->width), - util_format_get_nblocksy(texture->format, surface->height), - transfer->stride, - data); - - pipe->transfer_unmap(pipe, transfer); -} - - -void debug_dump_texture(struct pipe_context *pipe, - const char *prefix, - struct pipe_resource *texture) -{ - struct pipe_surface *surface, surf_tmpl; - - if (!texture) - return; - - /* XXX for now, just dump image for layer=0, level=0 */ - u_surface_default_template(&surf_tmpl, texture); - surface = pipe->create_surface(pipe, texture, &surf_tmpl); - if (surface) { - debug_dump_surface(pipe, prefix, surface); - pipe->surface_destroy(pipe, surface); - } -} - - -#pragma pack(push,2) -struct bmp_file_header { - uint16_t bfType; - uint32_t bfSize; - uint16_t bfReserved1; - uint16_t bfReserved2; - uint32_t bfOffBits; -}; -#pragma pack(pop) - -struct bmp_info_header { - uint32_t biSize; - int32_t biWidth; - int32_t biHeight; - uint16_t biPlanes; - uint16_t biBitCount; - uint32_t biCompression; - uint32_t biSizeImage; - int32_t biXPelsPerMeter; - int32_t biYPelsPerMeter; - uint32_t biClrUsed; - uint32_t biClrImportant; -}; - -struct bmp_rgb_quad { - uint8_t rgbBlue; - uint8_t rgbGreen; - uint8_t rgbRed; - uint8_t rgbAlpha; -}; - -void -debug_dump_surface_bmp(struct pipe_context *pipe, - const char *filename, - struct pipe_surface *surface) -{ - struct pipe_transfer *transfer; - struct pipe_resource *texture = surface->texture; - void *ptr; - - ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level, - surface->u.tex.first_layer, PIPE_TRANSFER_READ, - 0, 0, surface->width, surface->height, &transfer); - - debug_dump_transfer_bmp(pipe, filename, transfer, ptr); - - pipe->transfer_unmap(pipe, transfer); -} - -void -debug_dump_transfer_bmp(struct pipe_context *pipe, - const char *filename, - struct pipe_transfer *transfer, void *ptr) -{ - float *rgba; - - if (!transfer) - goto error1; - - rgba = MALLOC(transfer->box.width * - transfer->box.height * - transfer->box.depth * - 4*sizeof(float)); - if (!rgba) - goto error1; - - pipe_get_tile_rgba(transfer, ptr, 0, 0, - transfer->box.width, transfer->box.height, - rgba); - - debug_dump_float_rgba_bmp(filename, - transfer->box.width, transfer->box.height, - rgba, transfer->box.width); - - FREE(rgba); -error1: - ; -} - -void -debug_dump_float_rgba_bmp(const char *filename, - unsigned width, unsigned height, - float *rgba, unsigned stride) -{ - FILE *stream; - struct bmp_file_header bmfh; - struct bmp_info_header bmih; - unsigned x, y; - - if (!rgba) - goto error1; - - bmfh.bfType = 0x4d42; - bmfh.bfSize = 14 + 40 + height*width*4; - bmfh.bfReserved1 = 0; - bmfh.bfReserved2 = 0; - bmfh.bfOffBits = 14 + 40; - - bmih.biSize = 40; - bmih.biWidth = width; - bmih.biHeight = height; - bmih.biPlanes = 1; - bmih.biBitCount = 32; - bmih.biCompression = 0; - bmih.biSizeImage = height*width*4; - bmih.biXPelsPerMeter = 0; - bmih.biYPelsPerMeter = 0; - bmih.biClrUsed = 0; - bmih.biClrImportant = 0; - - stream = fopen(filename, "wb"); - if (!stream) - goto error1; - - fwrite(&bmfh, 14, 1, stream); - fwrite(&bmih, 40, 1, stream); - - y = height; - while(y--) { - float *ptr = rgba + (stride * y * 4); - for(x = 0; x < width; ++x) - { - struct bmp_rgb_quad pixel; - pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]); - pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]); - pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]); - pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]); - fwrite(&pixel, 1, 4, stream); - } - } - - fclose(stream); -error1: - ; -} - -void -debug_dump_ubyte_rgba_bmp(const char *filename, - unsigned width, unsigned height, - const ubyte *rgba, unsigned stride) -{ - FILE *stream; - struct bmp_file_header bmfh; - struct bmp_info_header bmih; - unsigned x, y; - - assert(rgba); - if(!rgba) - goto error1; - - bmfh.bfType = 0x4d42; - bmfh.bfSize = 14 + 40 + height*width*4; - bmfh.bfReserved1 = 0; - bmfh.bfReserved2 = 0; - bmfh.bfOffBits = 14 + 40; - - bmih.biSize = 40; - bmih.biWidth = width; - bmih.biHeight = height; - bmih.biPlanes = 1; - bmih.biBitCount = 32; - bmih.biCompression = 0; - bmih.biSizeImage = height*width*4; - bmih.biXPelsPerMeter = 0; - bmih.biYPelsPerMeter = 0; - bmih.biClrUsed = 0; - bmih.biClrImportant = 0; - - stream = fopen(filename, "wb"); - assert(stream); - if(!stream) - goto error1; - - fwrite(&bmfh, 14, 1, stream); - fwrite(&bmih, 40, 1, stream); - - y = height; - while(y--) { - const ubyte *ptr = rgba + (stride * y * 4); - for(x = 0; x < width; ++x) - { - struct bmp_rgb_quad pixel; - pixel.rgbRed = ptr[x*4 + 0]; - pixel.rgbGreen = ptr[x*4 + 1]; - pixel.rgbBlue = ptr[x*4 + 2]; - pixel.rgbAlpha = ptr[x*4 + 3]; - fwrite(&pixel, 1, 4, stream); - } - } - - fclose(stream); -error1: - ; -} - - -/** * Print PIPE_TRANSFER_x flags with a message. */ void diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 671bd37a085..c2707b402cb 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -464,45 +464,6 @@ void debug_memory_end(unsigned long beginning); -#ifdef DEBUG -struct pipe_context; -struct pipe_surface; -struct pipe_transfer; -struct pipe_resource; - -void debug_dump_image(const char *prefix, - enum pipe_format format, unsigned cpp, - unsigned width, unsigned height, - unsigned stride, - const void *data); -void debug_dump_surface(struct pipe_context *pipe, - const char *prefix, - struct pipe_surface *surface); -void debug_dump_texture(struct pipe_context *pipe, - const char *prefix, - struct pipe_resource *texture); -void debug_dump_surface_bmp(struct pipe_context *pipe, - const char *filename, - struct pipe_surface *surface); -void debug_dump_transfer_bmp(struct pipe_context *pipe, - const char *filename, - struct pipe_transfer *transfer, void *ptr); -void debug_dump_float_rgba_bmp(const char *filename, - unsigned width, unsigned height, - float *rgba, unsigned stride); -void debug_dump_ubyte_rgba_bmp(const char *filename, - unsigned width, unsigned height, - const ubyte *rgba, unsigned stride); -#else -#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) -#define debug_dump_surface(pipe, prefix, surface) ((void)0) -#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0) -#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0) -#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0) -#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0) -#endif - - void debug_print_transfer_flags(const char *msg, unsigned usage); diff --git a/src/gallium/auxiliary/util/u_debug_image.c b/src/gallium/auxiliary/util/u_debug_image.c new file mode 100644 index 00000000000..98d73a63de2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_debug_image.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2008-2016 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "util/u_debug_image.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_surface.h" +#include "util/u_tile.h" + +#include <stdio.h> + + +#ifdef DEBUG + +/** + * Dump an image to .ppm file. + * \param format PIPE_FORMAT_x + * \param cpp bytes per pixel + * \param width width in pixels + * \param height height in pixels + * \param stride row stride in bytes + */ +void +debug_dump_image(const char *prefix, + enum pipe_format format, unsigned cpp, + unsigned width, unsigned height, + unsigned stride, + const void *data) +{ + /* write a ppm file */ + char filename[256]; + unsigned char *rgb8; + FILE *f; + + util_snprintf(filename, sizeof(filename), "%s.ppm", prefix); + + rgb8 = MALLOC(height * width * 3); + if (!rgb8) { + return; + } + + util_format_translate( + PIPE_FORMAT_R8G8B8_UNORM, + rgb8, width * 3, + 0, 0, + format, + data, stride, + 0, 0, width, height); + + /* Must be opened in binary mode or DOS line ending causes data + * to be read with one byte offset. + */ + f = fopen(filename, "wb"); + if (f) { + fprintf(f, "P6\n"); + fprintf(f, "# ppm-file created by gallium\n"); + fprintf(f, "%i %i\n", width, height); + fprintf(f, "255\n"); + fwrite(rgb8, 1, height * width * 3, f); + fclose(f); + } + else { + fprintf(stderr, "Can't open %s for writing\n", filename); + } + + FREE(rgb8); +} + + +/* FIXME: dump resources, not surfaces... */ +void +debug_dump_surface(struct pipe_context *pipe, + const char *prefix, + struct pipe_surface *surface) +{ + struct pipe_resource *texture; + struct pipe_transfer *transfer; + void *data; + + if (!surface) + return; + + /* XXX: this doesn't necessarily work, as the driver may be using + * temporary storage for the surface which hasn't been propagated + * back into the texture. Need to nail down the semantics of views + * and transfers a bit better before we can say if extra work needs + * to be done here: + */ + texture = surface->texture; + + data = pipe_transfer_map(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, + PIPE_TRANSFER_READ, + 0, 0, surface->width, surface->height, &transfer); + if (!data) + return; + + debug_dump_image(prefix, + texture->format, + util_format_get_blocksize(texture->format), + util_format_get_nblocksx(texture->format, surface->width), + util_format_get_nblocksy(texture->format, surface->height), + transfer->stride, + data); + + pipe->transfer_unmap(pipe, transfer); +} + + +void +debug_dump_texture(struct pipe_context *pipe, + const char *prefix, + struct pipe_resource *texture) +{ + struct pipe_surface *surface, surf_tmpl; + + if (!texture) + return; + + /* XXX for now, just dump image for layer=0, level=0 */ + u_surface_default_template(&surf_tmpl, texture); + surface = pipe->create_surface(pipe, texture, &surf_tmpl); + if (surface) { + debug_dump_surface(pipe, prefix, surface); + pipe->surface_destroy(pipe, surface); + } +} + + +#pragma pack(push,2) +struct bmp_file_header { + uint16_t bfType; + uint32_t bfSize; + uint16_t bfReserved1; + uint16_t bfReserved2; + uint32_t bfOffBits; +}; +#pragma pack(pop) + +struct bmp_info_header { + uint32_t biSize; + int32_t biWidth; + int32_t biHeight; + uint16_t biPlanes; + uint16_t biBitCount; + uint32_t biCompression; + uint32_t biSizeImage; + int32_t biXPelsPerMeter; + int32_t biYPelsPerMeter; + uint32_t biClrUsed; + uint32_t biClrImportant; +}; + +struct bmp_rgb_quad { + uint8_t rgbBlue; + uint8_t rgbGreen; + uint8_t rgbRed; + uint8_t rgbAlpha; +}; + +void +debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, + struct pipe_surface *surface) +{ + struct pipe_transfer *transfer; + struct pipe_resource *texture = surface->texture; + void *ptr; + + ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, PIPE_TRANSFER_READ, + 0, 0, surface->width, surface->height, &transfer); + + debug_dump_transfer_bmp(pipe, filename, transfer, ptr); + + pipe->transfer_unmap(pipe, transfer); +} + +void +debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, + struct pipe_transfer *transfer, void *ptr) +{ + float *rgba; + + if (!transfer) + goto error1; + + rgba = MALLOC(transfer->box.width * + transfer->box.height * + transfer->box.depth * + 4*sizeof(float)); + if (!rgba) + goto error1; + + pipe_get_tile_rgba(transfer, ptr, 0, 0, + transfer->box.width, transfer->box.height, + rgba); + + debug_dump_float_rgba_bmp(filename, + transfer->box.width, transfer->box.height, + rgba, transfer->box.width); + + FREE(rgba); +error1: + ; +} + +void +debug_dump_float_rgba_bmp(const char *filename, + unsigned width, unsigned height, + float *rgba, unsigned stride) +{ + FILE *stream; + struct bmp_file_header bmfh; + struct bmp_info_header bmih; + unsigned x, y; + + if (!rgba) + goto error1; + + bmfh.bfType = 0x4d42; + bmfh.bfSize = 14 + 40 + height*width*4; + bmfh.bfReserved1 = 0; + bmfh.bfReserved2 = 0; + bmfh.bfOffBits = 14 + 40; + + bmih.biSize = 40; + bmih.biWidth = width; + bmih.biHeight = height; + bmih.biPlanes = 1; + bmih.biBitCount = 32; + bmih.biCompression = 0; + bmih.biSizeImage = height*width*4; + bmih.biXPelsPerMeter = 0; + bmih.biYPelsPerMeter = 0; + bmih.biClrUsed = 0; + bmih.biClrImportant = 0; + + stream = fopen(filename, "wb"); + if (!stream) + goto error1; + + fwrite(&bmfh, 14, 1, stream); + fwrite(&bmih, 40, 1, stream); + + y = height; + while (y--) { + float *ptr = rgba + (stride * y * 4); + for (x = 0; x < width; ++x) { + struct bmp_rgb_quad pixel; + pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]); + pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]); + pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]); + pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]); + fwrite(&pixel, 1, 4, stream); + } + } + + fclose(stream); +error1: + ; +} + +void +debug_dump_ubyte_rgba_bmp(const char *filename, + unsigned width, unsigned height, + const ubyte *rgba, unsigned stride) +{ + FILE *stream; + struct bmp_file_header bmfh; + struct bmp_info_header bmih; + unsigned x, y; + + assert(rgba); + if (!rgba) + goto error1; + + bmfh.bfType = 0x4d42; + bmfh.bfSize = 14 + 40 + height*width*4; + bmfh.bfReserved1 = 0; + bmfh.bfReserved2 = 0; + bmfh.bfOffBits = 14 + 40; + + bmih.biSize = 40; + bmih.biWidth = width; + bmih.biHeight = height; + bmih.biPlanes = 1; + bmih.biBitCount = 32; + bmih.biCompression = 0; + bmih.biSizeImage = height*width*4; + bmih.biXPelsPerMeter = 0; + bmih.biYPelsPerMeter = 0; + bmih.biClrUsed = 0; + bmih.biClrImportant = 0; + + stream = fopen(filename, "wb"); + assert(stream); + if (!stream) + goto error1; + + fwrite(&bmfh, 14, 1, stream); + fwrite(&bmih, 40, 1, stream); + + y = height; + while (y--) { + const ubyte *ptr = rgba + (stride * y * 4); + for (x = 0; x < width; ++x) { + struct bmp_rgb_quad pixel; + pixel.rgbRed = ptr[x*4 + 0]; + pixel.rgbGreen = ptr[x*4 + 1]; + pixel.rgbBlue = ptr[x*4 + 2]; + pixel.rgbAlpha = ptr[x*4 + 3]; + fwrite(&pixel, 1, 4, stream); + } + } + + fclose(stream); +error1: + ; +} + +#endif diff --git a/src/gallium/auxiliary/util/u_debug_image.h b/src/gallium/auxiliary/util/u_debug_image.h new file mode 100644 index 00000000000..f190eec5f52 --- /dev/null +++ b/src/gallium/auxiliary/util/u_debug_image.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2008-2016 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef U_DEBUG_IMAGE_H +#define U_DEBUG_IMAGE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + + +#ifdef DEBUG +struct pipe_context; +struct pipe_surface; +struct pipe_transfer; +struct pipe_resource; + +void debug_dump_image(const char *prefix, + enum pipe_format format, unsigned cpp, + unsigned width, unsigned height, + unsigned stride, + const void *data); +void debug_dump_surface(struct pipe_context *pipe, + const char *prefix, + struct pipe_surface *surface); +void debug_dump_texture(struct pipe_context *pipe, + const char *prefix, + struct pipe_resource *texture); +void debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, + struct pipe_surface *surface); +void debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, + struct pipe_transfer *transfer, void *ptr); +void debug_dump_float_rgba_bmp(const char *filename, + unsigned width, unsigned height, + float *rgba, unsigned stride); +void debug_dump_ubyte_rgba_bmp(const char *filename, + unsigned width, unsigned height, + const ubyte *rgba, unsigned stride); +#else +#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) +#define debug_dump_surface(pipe, prefix, surface) ((void)0) +#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0) +#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0) +#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0) +#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0) +#endif + + +#endif diff --git a/src/gallium/auxiliary/util/u_debug_stack.c b/src/gallium/auxiliary/util/u_debug_stack.c index 68961d3510e..1faa1903a76 100644 --- a/src/gallium/auxiliary/util/u_debug_stack.c +++ b/src/gallium/auxiliary/util/u_debug_stack.c @@ -2,7 +2,7 @@ * * Copyright 2009 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,13 +22,13 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /** * @file * Stack backtracing. - * + * * @author Jose Fonseca <[email protected]> */ @@ -44,12 +44,13 @@ /** * Capture stack backtrace. * - * NOTE: The implementation of this function is quite big, but it is important not to - * break it down in smaller functions to avoid adding new frames to the calling stack. + * NOTE: The implementation of this function is quite big, but it is important + * not to break it down in smaller functions to avoid adding new frames to the + * calling stack. */ void debug_backtrace_capture(struct debug_stack_frame *backtrace, - unsigned start_frame, + unsigned start_frame, unsigned nr_frames) { const void **frame_pointer = NULL; @@ -66,7 +67,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace, */ #if defined(PIPE_OS_WINDOWS) { - typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG, PVOID *, PULONG); + typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG, + PVOID *, PULONG); static PFNCAPTURESTACKBACKTRACE pfnCaptureStackBackTrace = NULL; if (!pfnCaptureStackBackTrace) { @@ -76,8 +78,9 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace, assert(hModule); } if (hModule) { - pfnCaptureStackBackTrace = (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule, - "RtlCaptureStackBackTrace"); + pfnCaptureStackBackTrace = + (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule, + "RtlCaptureStackBackTrace"); } } if (pfnCaptureStackBackTrace) { @@ -88,7 +91,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace, start_frame += 1; assert(start_frame + nr_frames < 63); - i = pfnCaptureStackBackTrace(start_frame, nr_frames, (PVOID *) &backtrace->function, NULL); + i = pfnCaptureStackBackTrace(start_frame, nr_frames, + (PVOID *) &backtrace->function, NULL); /* Pad remaing requested frames with NULL */ while (i < nr_frames) { @@ -110,50 +114,49 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace, #else frame_pointer = NULL; #endif - - + #ifdef PIPE_ARCH_X86 - while(nr_frames) { + while (nr_frames) { const void **next_frame_pointer; - if(!frame_pointer) + if (!frame_pointer) break; - - if(start_frame) + + if (start_frame) --start_frame; else { backtrace[i++].function = frame_pointer[1]; --nr_frames; } - + next_frame_pointer = (const void **)frame_pointer[0]; - + /* Limit the stack walk to avoid referencing undefined memory */ - if((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer || - (uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024) + if ((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer || + (uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024) break; - + frame_pointer = next_frame_pointer; } #else (void) frame_pointer; #endif - while(nr_frames) { + while (nr_frames) { backtrace[i++].function = NULL; --nr_frames; } } - + void -debug_backtrace_dump(const struct debug_stack_frame *backtrace, +debug_backtrace_dump(const struct debug_stack_frame *backtrace, unsigned nr_frames) { unsigned i; - - for(i = 0; i < nr_frames; ++i) { - if(!backtrace[i].function) + + for (i = 0; i < nr_frames; ++i) { + if (!backtrace[i].function) break; debug_symbol_print(backtrace[i].function); } diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index 3428172203b..74e6f99da67 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -58,7 +58,7 @@ #define NUM_NEW_TOKENS 53 -static void +void util_pstipple_update_stipple_texture(struct pipe_context *pipe, struct pipe_resource *tex, const uint32_t pattern[32]) @@ -118,7 +118,7 @@ util_pstipple_create_stipple_texture(struct pipe_context *pipe, tex = screen->resource_create(screen, &templat); - if (tex) + if (tex && pattern) util_pstipple_update_stipple_texture(pipe, tex, pattern); return tex; diff --git a/src/gallium/auxiliary/util/u_pstipple.h b/src/gallium/auxiliary/util/u_pstipple.h index ef8396f4318..d1662be2839 100644 --- a/src/gallium/auxiliary/util/u_pstipple.h +++ b/src/gallium/auxiliary/util/u_pstipple.h @@ -36,6 +36,11 @@ struct pipe_resource; struct pipe_shader_state; +extern void +util_pstipple_update_stipple_texture(struct pipe_context *pipe, + struct pipe_resource *tex, + const uint32_t pattern[32]); + extern struct pipe_resource * util_pstipple_create_stipple_texture(struct pipe_context *pipe, const uint32_t pattern[32]); diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c index b569c8f9907..caef2a8245c 100644 --- a/src/gallium/auxiliary/util/u_staging.c +++ b/src/gallium/auxiliary/util/u_staging.c @@ -29,11 +29,14 @@ #include "util/u_memory.h" #include "util/u_inlines.h" + static void -util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template) +util_staging_resource_template(struct pipe_resource *pt, unsigned width, + unsigned height, unsigned depth, + struct pipe_resource *template) { memset(template, 0, sizeof(struct pipe_resource)); - if(pt->target != PIPE_BUFFER && depth <= 1) + if (pt->target != PIPE_BUFFER && depth <= 1) template->target = PIPE_TEXTURE_RECT; else template->target = pt->target; @@ -49,16 +52,15 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne template->flags = 0; } + struct util_staging_transfer * util_staging_transfer_init(struct pipe_context *pipe, - struct pipe_resource *pt, - unsigned level, - unsigned usage, - const struct pipe_box *box, - boolean direct, struct util_staging_transfer *tx) + struct pipe_resource *pt, + unsigned level, unsigned usage, + const struct pipe_box *box, + boolean direct, struct util_staging_transfer *tx) { struct pipe_screen *pscreen = pipe->screen; - struct pipe_resource staging_resource_template; pipe_resource_reference(&tx->base.resource, pt); @@ -66,23 +68,22 @@ util_staging_transfer_init(struct pipe_context *pipe, tx->base.usage = usage; tx->base.box = *box; - if (direct) - { + if (direct) { tx->staging_resource = pt; return tx; } - util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template); - tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template); - if (!tx->staging_resource) - { + util_staging_resource_template(pt, box->width, box->height, + box->depth, &staging_resource_template); + tx->staging_resource = pscreen->resource_create(pscreen, + &staging_resource_template); + if (!tx->staging_resource) { pipe_resource_reference(&tx->base.resource, NULL); FREE(tx); return NULL; } - if (usage & PIPE_TRANSFER_READ) - { + if (usage & PIPE_TRANSFER_READ) { /* XXX this looks wrong dst is always the same but looping over src z? */ int zi; struct pipe_box sbox; @@ -92,7 +93,7 @@ util_staging_transfer_init(struct pipe_context *pipe, sbox.width = box->width; sbox.height = box->height; sbox.depth = 1; - for(zi = 0; zi < box->depth; ++zi) { + for (zi = 0; zi < box->depth; ++zi) { sbox.z = sbox.z + zi; pipe->resource_copy_region(pipe, tx->staging_resource, 0, 0, 0, 0, tx->base.resource, level, &sbox); @@ -102,14 +103,15 @@ util_staging_transfer_init(struct pipe_context *pipe, return tx; } + void -util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx) +util_staging_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *ptx) { struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx; - if (tx->staging_resource != tx->base.resource) - { - if(tx->base.usage & PIPE_TRANSFER_WRITE) { + if (tx->staging_resource != tx->base.resource) { + if (tx->base.usage & PIPE_TRANSFER_WRITE) { /* XXX this looks wrong src is always the same but looping over dst z? */ int zi; struct pipe_box sbox; @@ -119,8 +121,10 @@ util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *p sbox.width = tx->base.box.width; sbox.height = tx->base.box.height; sbox.depth = 1; - for(zi = 0; zi < tx->base.box.depth; ++zi) - pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, + for (zi = 0; zi < tx->base.box.depth; ++zi) + pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level, + tx->base.box.x, tx->base.box.y, + tx->base.box.z + zi, tx->staging_resource, 0, &sbox); } diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h index ddbb33443e4..6c468aad161 100644 --- a/src/gallium/auxiliary/util/u_staging.h +++ b/src/gallium/auxiliary/util/u_staging.h @@ -42,22 +42,26 @@ struct util_staging_transfer { struct pipe_transfer base; - /* if direct, same as base.resource, otherwise the temporary staging resource */ + /* if direct, same as base.resource, otherwise the temporary staging + * resource + */ struct pipe_resource *staging_resource; }; -/* user must be stride, slice_stride and offset */ -/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */ -/* staging resource is currently created with PIPE_USAGE_STAGING */ +/* user must be stride, slice_stride and offset. + * pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING + * should be a good value to pass for direct staging resource is currently + * created with PIPE_USAGE_STAGING + */ struct util_staging_transfer * util_staging_transfer_init(struct pipe_context *pipe, - struct pipe_resource *pt, - unsigned level, - unsigned usage, - const struct pipe_box *box, - boolean direct, struct util_staging_transfer *tx); + struct pipe_resource *pt, + unsigned level, unsigned usage, + const struct pipe_box *box, + boolean direct, struct util_staging_transfer *tx); void -util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx); +util_staging_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *ptx); #endif diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 6eb6a2d52ef..f38dc8643b4 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1004,7 +1004,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, nir_const_value *const_offset; /* UBO addresses are the first driver params: */ unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0); - int off = intr->const_index[0]; + int off = 0; /* First src is ubo index, which could either be an immed or not: */ src0 = get_src(ctx, &intr->src[0])[0]; @@ -1092,7 +1092,7 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr) nir_deref_array *darr = nir_deref_as_array(dvar->deref.child); struct ir3_array *arr = get_var(ctx, dvar->var); struct ir3_instruction *addr, **src; - unsigned wrmask = intr->const_index[0]; + unsigned wrmask = nir_intrinsic_write_mask(intr); compile_assert(ctx, dvar->deref.child && (dvar->deref.child->deref_type == nir_deref_type_array)); @@ -1145,8 +1145,8 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; struct ir3_instruction **dst, **src; struct ir3_block *b = ctx->block; - int idx = intr->const_index[0]; nir_const_value *const_offset; + int idx; if (info->has_dest) { dst = get_dst(ctx, &intr->dest, intr->num_components); @@ -1156,6 +1156,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_load_uniform: + idx = nir_intrinsic_base(intr); const_offset = nir_src_as_const_value(intr->src[0]); if (const_offset) { idx += const_offset->u[0]; @@ -1182,6 +1183,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinsic_load_ubo(ctx, intr, dst); break; case nir_intrinsic_load_input: + idx = nir_intrinsic_base(intr); const_offset = nir_src_as_const_value(intr->src[0]); if (const_offset) { idx += const_offset->u[0]; @@ -1208,6 +1210,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinsic_store_var(ctx, intr); break; case nir_intrinsic_store_output: + idx = nir_intrinsic_base(intr); const_offset = nir_src_as_const_value(intr->src[1]); compile_assert(ctx, const_offset != NULL); idx += const_offset->u[0]; @@ -1243,6 +1246,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) dst[0] = ctx->instance_id; break; case nir_intrinsic_load_user_clip_plane: + idx = nir_intrinsic_ucp_id(intr); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n); diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c index 69f36ae5df6..6831d2c4eff 100644 --- a/src/gallium/drivers/ilo/ilo_draw.c +++ b/src/gallium/drivers/ilo/ilo_draw.c @@ -71,6 +71,7 @@ query_process_bo(const struct ilo_context *ilo, struct ilo_query *q) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: @@ -157,6 +158,7 @@ ilo_init_draw_query(struct ilo_context *ilo, struct ilo_query *q) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: diff --git a/src/gallium/drivers/ilo/ilo_query.c b/src/gallium/drivers/ilo/ilo_query.c index 27d08128ab0..106bd42a335 100644 --- a/src/gallium/drivers/ilo/ilo_query.c +++ b/src/gallium/drivers/ilo/ilo_query.c @@ -47,7 +47,7 @@ static const struct { #define INFOX(prefix) { NULL, NULL, NULL, NULL, } [PIPE_QUERY_OCCLUSION_COUNTER] = INFO(draw), - [PIPE_QUERY_OCCLUSION_PREDICATE] = INFOX(draw), + [PIPE_QUERY_OCCLUSION_PREDICATE] = INFO(draw), [PIPE_QUERY_TIMESTAMP] = INFO(draw), [PIPE_QUERY_TIMESTAMP_DISJOINT] = INFOX(draw), [PIPE_QUERY_TIME_ELAPSED] = INFO(draw), @@ -75,6 +75,7 @@ ilo_create_query(struct pipe_context *pipe, unsigned query_type, unsigned index) switch (query_type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -163,6 +164,12 @@ query_serialize(const struct ilo_query *q, void *buf) dst[0] = q->result.u64; } break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + { + uint64_t *dst = buf; + dst[0] = !!q->result.u64; + } + break; case PIPE_QUERY_PIPELINE_STATISTICS: { const struct pipe_query_data_pipeline_statistics *stats = diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 8bc04df4fab..9a47ca80505 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -202,6 +202,7 @@ ilo_render_get_query_len(const struct ilo_render *render, switch (query_type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: /* no reg */ @@ -268,6 +269,7 @@ ilo_render_emit_query(struct ilo_render *render, switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL | GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT; break; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 268aab26c40..241c2ccafb7 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "util/u_debug_image.h" #include "util/u_string.h" #include "draw/draw_context.h" #include "lp_flush.h" diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c index 3980be9579a..75a4b0446fe 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_query.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c @@ -120,6 +120,7 @@ nv30_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->report = 1; break; case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: q->enable = NV30_3D_QUERY_ENABLE; q->report = 1; break; @@ -203,7 +204,6 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, struct nv30_query *q = nv30_query(pq); volatile uint32_t *ntfy0 = nv30_ntfy(screen, q->qo[0]); volatile uint32_t *ntfy1 = nv30_ntfy(screen, q->qo[1]); - uint64_t *res64 = &result->u64; if (ntfy1) { while (ntfy1[3] & 0xff000000) { @@ -227,7 +227,10 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, nv30_query_object_del(screen, &q->qo[1]); } - *res64 = q->result; + if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) + result->b = !!q->result; + else + result->u64 = q->result; return true; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index cccd3b71672..727b509372d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -156,6 +156,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: hq->nesting = nv50->screen->num_occlusion_queries_active++; if (hq->nesting) { nv50_hw_query_get(push, q, 0x10, 0x0100f002); @@ -213,6 +214,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: nv50_hw_query_get(push, q, 0, 0x0100f002); if (--nv50->screen->num_occlusion_queries_active == 0) { PUSH_SPACE(push, 2); @@ -304,6 +306,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ res64[0] = hq->data[1] - hq->data[5]; break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + res8[0] = hq->data[1] != hq->data[5]; + break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ res64[0] = data64[0] - data64[2]; @@ -372,6 +377,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: hq->rotate = 32; break; case PIPE_QUERY_PRIMITIVES_GENERATED: diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 2cf08897a8d..d92e691fdb8 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -961,8 +961,8 @@ struct pipe_resource *r600_compute_global_buffer_create( templ->array_size); result->base.b.vtbl = &r600_global_buffer_vtbl; - result->base.b.b.screen = screen; result->base.b.b = *templ; + result->base.b.b.screen = screen; pipe_reference_init(&result->base.b.b.reference, 1); size_in_dw = (templ->width0+3) / 4; diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 3d0987624a6..474154e52ff 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -55,6 +55,14 @@ enum radeon_llvm_shader_type { RADEON_LLVM_SHADER_CS = 3, }; +void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) +{ + char str[16]; + + snprintf(str, sizeof(str), "%i", value); + LLVMAddTargetDependentFunctionAttr(F, name, str); +} + /** * Set the shader type we want to compile * @@ -62,7 +70,6 @@ enum radeon_llvm_shader_type { */ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) { - char Str[2]; enum radeon_llvm_shader_type llvm_type; switch (type) { @@ -84,9 +91,7 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) assert(0); } - sprintf(Str, "%1d", llvm_type); - - LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str); + radeon_llvm_add_attribute(F, "ShaderType", llvm_type); } static void init_r600_target() diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h index 45f05a9e0e1..84dbd2584a1 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.h +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h @@ -34,6 +34,7 @@ struct pipe_debug_callback; struct radeon_shader_binary; +void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value); void radeon_llvm_shader_type(LLVMValueRef F, unsigned type); LLVMTargetRef radeon_llvm_get_r600_target(const char *triple); diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 825fbb181ba..4d27e86b414 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -124,7 +124,8 @@ static void *si_create_compute_state( code, header->num_bytes); si_compile_llvm(sctx->screen, &program->kernels[i].binary, &program->kernels[i].config, sctx->tm, - mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE); + mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE, + "Compute Shader"); si_shader_dump(sctx->screen, &program->kernels[i], &sctx->b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, &program->kernels[i]); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index d60c4515625..b5a4034cc12 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->db_render_state); si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); si_mark_atom_dirty(ctx, &ctx->spi_map); - si_mark_atom_dirty(ctx, &ctx->spi_ps_input); si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); si_all_descriptors_begin_new_cs(ctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 48947442757..3c963db5078 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -202,7 +202,6 @@ struct si_context { struct si_viewports viewports; struct si_stencil_ref stencil_ref; struct r600_atom spi_map; - struct r600_atom spi_ps_input; /* Precomputed states. */ struct si_pm4_state *init_config; @@ -222,7 +221,6 @@ struct si_context { struct si_vertex_element *vertex_elements; unsigned sprite_coord_enable; bool flatshade; - bool force_persample_interp; /* shader descriptors */ struct si_descriptors vertex_buffers; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d9ed6b234e0..c1d3edc7143 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) } /* This shouldn't be used by explicit INTERP opcodes. */ -static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, - unsigned param) +static unsigned select_interp_param(struct si_shader_context *si_shader_ctx, + unsigned param) { - struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; - unsigned sample_param = 0; - LLVMValueRef default_ij, sample_ij, force_sample; - - default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param); + if (!si_shader_ctx->shader->key.ps.force_persample_interp) + return param; /* If the shader doesn't use center/centroid, just return the parameter. * @@ -850,79 +847,52 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, switch (param) { case SI_PARAM_PERSP_CENTROID: case SI_PARAM_PERSP_CENTER: - if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp) - return default_ij; - - sample_param = SI_PARAM_PERSP_SAMPLE; - break; + return SI_PARAM_PERSP_SAMPLE; case SI_PARAM_LINEAR_CENTROID: case SI_PARAM_LINEAR_CENTER: - if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear) - return default_ij; - - sample_param = SI_PARAM_LINEAR_SAMPLE; - break; + return SI_PARAM_LINEAR_SAMPLE; default: - return default_ij; + return param; } - - /* Otherwise, we have to select (i,j) based on a user data SGPR. */ - sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param); - - /* TODO: this can be done more efficiently by switching between - * 2 prologs. - */ - force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, - SI_PARAM_PS_STATE_BITS); - force_sample = LLVMBuildTrunc(gallivm->builder, force_sample, - LLVMInt1TypeInContext(gallivm->context), ""); - return LLVMBuildSelect(gallivm->builder, force_sample, - sample_ij, default_ij, ""); } -static void declare_input_fs( - struct radeon_llvm_context *radeon_bld, - unsigned input_index, - const struct tgsi_full_declaration *decl) +/** + * Interpolate a fragment shader input. + * + * @param si_shader_ctx context + * @param input_index index of the input in hardware + * @param semantic_name TGSI_SEMANTIC_* + * @param semantic_index semantic index + * @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset) + * @param colors_read_mask color components read (4 bits for each color, 8 bits in total) + * @param interp_param interpolation weights (i,j) + * @param prim_mask SI_PARAM_PRIM_MASK + * @param face SI_PARAM_FRONT_FACE + * @param result the return value (4 components) + */ +static void interp_fs_input(struct si_shader_context *si_shader_ctx, + unsigned input_index, + unsigned semantic_name, + unsigned semantic_index, + unsigned num_interp_inputs, + unsigned colors_read_mask, + LLVMValueRef interp_param, + LLVMValueRef prim_mask, + LLVMValueRef face, + LLVMValueRef result[4]) { - struct lp_build_context *base = &radeon_bld->soa.bld_base.base; - struct si_shader_context *si_shader_ctx = - si_shader_context(&radeon_bld->soa.bld_base); - struct si_shader *shader = si_shader_ctx->shader; - struct lp_build_context *uint = &radeon_bld->soa.bld_base.uint_bld; + struct lp_build_context *base = &si_shader_ctx->radeon_bld.soa.bld_base.base; + struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; struct gallivm_state *gallivm = base->gallivm; LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); - LLVMValueRef main_fn = radeon_bld->main_fn; - - LLVMValueRef interp_param = NULL; - int interp_param_idx; const char * intr_name; - - /* This value is: - * [15:0] NewPrimMask (Bit mask for each quad. It is set it the - * quad begins a new primitive. Bit 0 always needs - * to be unset) - * [32:16] ParamOffset - * - */ - LLVMValueRef params = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK); LLVMValueRef attr_number; unsigned chan; - shader->ps_input_param_offset[input_index] = shader->nparam++; - attr_number = lp_build_const_int32(gallivm, - shader->ps_input_param_offset[input_index]); - - shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate; - interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate, - decl->Interp.Location); - if (interp_param_idx == -1) - return; - else if (interp_param_idx) - interp_param = get_interp_param(si_shader_ctx, interp_param_idx); + attr_number = lp_build_const_int32(gallivm, input_index); /* fs.constant returns the param from the middle vertex, so it's not * really useful for flat shading. It's meant to be used for custom @@ -936,24 +906,28 @@ static void declare_input_fs( */ intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; - if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + if (semantic_name == TGSI_SEMANTIC_COLOR && si_shader_ctx->shader->key.ps.color_two_side) { LLVMValueRef args[4]; - LLVMValueRef face, is_face_positive; - LLVMValueRef back_attr_number = - lp_build_const_int32(gallivm, - shader->ps_input_param_offset[input_index] + 1); + LLVMValueRef is_face_positive; + LLVMValueRef back_attr_number; - face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE); + /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1", + * otherwise it's at offset "num_inputs". + */ + unsigned back_attr_offset = num_interp_inputs; + if (semantic_index == 1 && colors_read_mask & 0xf) + back_attr_offset += 1; + + back_attr_number = lp_build_const_int32(gallivm, back_attr_offset); is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE, face, uint->zero, ""); - args[2] = params; + args[2] = prim_mask; args[3] = interp_param; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); - unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); LLVMValueRef front, back; args[0] = llvm_chan; @@ -967,48 +941,71 @@ static void declare_input_fs( input_type, args, args[3] ? 4 : 3, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - radeon_bld->inputs[soa_index] = - LLVMBuildSelect(gallivm->builder, + result[chan] = LLVMBuildSelect(gallivm->builder, is_face_positive, front, back, ""); } - - shader->nparam++; - } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) { + } else if (semantic_name == TGSI_SEMANTIC_FOG) { LLVMValueRef args[4]; args[0] = uint->zero; args[1] = attr_number; - args[2] = params; + args[2] = prim_mask; args[3] = interp_param; - radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] = - lp_build_intrinsic(gallivm->builder, intr_name, + result[0] = lp_build_intrinsic(gallivm->builder, intr_name, input_type, args, args[3] ? 4 : 3, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] = - radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] = - lp_build_const_float(gallivm, 0.0f); - radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] = - lp_build_const_float(gallivm, 1.0f); + result[1] = + result[2] = lp_build_const_float(gallivm, 0.0f); + result[3] = lp_build_const_float(gallivm, 1.0f); } else { for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { LLVMValueRef args[4]; LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); - unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); + args[0] = llvm_chan; args[1] = attr_number; - args[2] = params; + args[2] = prim_mask; args[3] = interp_param; - radeon_bld->inputs[soa_index] = - lp_build_intrinsic(gallivm->builder, intr_name, + result[chan] = lp_build_intrinsic(gallivm->builder, intr_name, input_type, args, args[3] ? 4 : 3, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); } } } +static void declare_input_fs( + struct radeon_llvm_context *radeon_bld, + unsigned input_index, + const struct tgsi_full_declaration *decl) +{ + struct si_shader_context *si_shader_ctx = + si_shader_context(&radeon_bld->soa.bld_base); + struct si_shader *shader = si_shader_ctx->shader; + LLVMValueRef main_fn = radeon_bld->main_fn; + LLVMValueRef interp_param = NULL; + int interp_param_idx; + + interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate, + decl->Interp.Location); + if (interp_param_idx == -1) + return; + else if (interp_param_idx) { + interp_param_idx = select_interp_param(si_shader_ctx, + interp_param_idx); + interp_param = LLVMGetParam(main_fn, interp_param_idx); + } + + interp_fs_input(si_shader_ctx, input_index, decl->Semantic.Name, + decl->Semantic.Index, shader->selector->info.num_inputs, + shader->selector->info.colors_read, interp_param, + LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK), + LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE), + &radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)]); +} + static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld) { return unpack_param(si_shader_context(&radeon_bld->soa.bld_base), @@ -1060,7 +1057,6 @@ static void declare_system_value( struct si_shader_context *si_shader_ctx = si_shader_context(&radeon_bld->soa.bld_base); struct lp_build_context *bld = &radeon_bld->soa.bld_base.base; - struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld; struct gallivm_state *gallivm = &radeon_bld->gallivm; LLVMValueRef value = 0; @@ -1136,12 +1132,10 @@ static void declare_system_value( } case TGSI_SEMANTIC_SAMPLEMASK: - /* Smoothing isn't MSAA in GL, but it's MSAA in hardware. - * Therefore, force gl_SampleMaskIn to 1 for GL. */ - if (si_shader_ctx->shader->key.ps.poly_line_smoothing) - value = uint_bld->one; - else - value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE); + /* This can only occur with the OpenGL Core profile, which + * doesn't support smoothing. + */ + value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE); break; case TGSI_SEMANTIC_TESSCOORD: @@ -1965,21 +1959,20 @@ handle_semantic: } } -/* This only writes the tessellation factor levels. */ -static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) +static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, + LLVMValueRef rel_patch_id, + LLVMValueRef invocation_id, + LLVMValueRef tcs_out_current_patch_data_offset) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_shader *shader = si_shader_ctx->shader; unsigned tess_inner_index, tess_outer_index; - LLVMValueRef lds_base, lds_inner, lds_outer; - LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers; - LLVMValueRef out[6], vec0, vec1, invocation_id; + LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; + LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base; unsigned stride, outer_comps, inner_comps, i; struct lp_build_if_state if_ctx; - invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5); - /* Do this only for invocation 0, because the tess levels are per-patch, * not per-vertex. * @@ -2018,7 +2011,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0); tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0); - lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx); + lds_base = tcs_out_current_patch_data_offset; lds_inner = LLVMBuildAdd(gallivm->builder, lds_base, lp_build_const_int32(gallivm, tess_inner_index * 4), ""); @@ -2047,7 +2040,6 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) /* Get the offset. */ tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_TESS_FACTOR_OFFSET); - rel_patch_id = get_rel_patch_id(si_shader_ctx); byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id, lp_build_const_int32(gallivm, 4 * stride), ""); @@ -2060,6 +2052,20 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) lp_build_endif(&if_ctx); } +/* This only writes the tessellation factor levels. */ +static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + LLVMValueRef invocation_id; + + invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5); + + si_write_tess_factors(bld_base, + get_rel_patch_id(si_shader_ctx), + invocation_id, + get_tcs_out_current_patch_data_offset(si_shader_ctx)); +} + static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); @@ -3253,17 +3259,17 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, LLVMValueRef interp_param; const struct tgsi_full_instruction *inst = emit_data->inst; const char *intr_name; - int input_index; + int input_index = inst->Src[0].Register.Index; int chan; int i; LLVMValueRef attr_number; LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK); int interp_param_idx; + unsigned interp = shader->selector->info.input_interpolate[input_index]; unsigned location; assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); - input_index = inst->Src[0].Register.Index; if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) @@ -3271,8 +3277,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, else location = TGSI_INTERPOLATE_LOC_CENTROID; - interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index], - location); + interp_param_idx = lookup_interp_param_index(interp, location); if (interp_param_idx == -1) return; else if (interp_param_idx) @@ -3280,8 +3285,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, else interp_param = NULL; - attr_number = lp_build_const_int32(gallivm, - shader->ps_input_param_offset[input_index]); + attr_number = lp_build_const_int32(gallivm, input_index); if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { @@ -3632,7 +3636,6 @@ static void create_function(struct si_shader_context *si_shader_ctx) case TGSI_PROCESSOR_FRAGMENT: params[SI_PARAM_ALPHA_REF] = f32; - params[SI_PARAM_PS_STATE_BITS] = i32; params[SI_PARAM_PRIM_MASK] = i32; last_sgpr = SI_PARAM_PRIM_MASK; params[SI_PARAM_PERSP_SAMPLE] = v2i32; @@ -3663,10 +3666,6 @@ static void create_function(struct si_shader_context *si_shader_ctx) radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params); radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type); - if (shader->dx10_clamp_mode) - LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn, - "enable-no-nans-fp-math", "true"); - for (i = 0; i <= last_sgpr; ++i) { LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i); @@ -3884,7 +3883,7 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary, conf->spi_ps_input_ena = value; break; case R_0286D0_SPI_PS_INPUT_ADDR: - /* Not used yet, but will be in the future */ + conf->spi_ps_input_addr = value; break; case R_0286E8_SPI_TMPRING_SIZE: case R_00B860_COMPUTE_TMPRING_SIZE: @@ -3904,6 +3903,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary, } break; } + + if (!conf->spi_ps_input_addr) + conf->spi_ps_input_addr = conf->spi_ps_input_ena; } } @@ -4045,6 +4047,13 @@ static void si_shader_dump_stats(struct si_screen *sscreen, max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); if (r600_can_dump_shader(&sscreen->b, processor)) { + if (processor == TGSI_PROCESSOR_FRAGMENT) { + fprintf(stderr, "*** SHADER CONFIG ***\n" + "SPI_PS_INPUT_ADDR = 0x%04x\n" + "SPI_PS_INPUT_ENA = 0x%04x\n", + conf->spi_ps_input_addr, conf->spi_ps_input_ena); + } + fprintf(stderr, "*** SHADER STATS ***\n" "SGPRS: %d\n" "VGPRS: %d\n" @@ -4084,7 +4093,8 @@ int si_compile_llvm(struct si_screen *sscreen, LLVMTargetMachineRef tm, LLVMModuleRef mod, struct pipe_debug_callback *debug, - unsigned processor) + unsigned processor, + const char *name) { int r = 0; unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations); @@ -4092,8 +4102,11 @@ int si_compile_llvm(struct si_screen *sscreen, if (r600_can_dump_shader(&sscreen->b, processor)) { fprintf(stderr, "radeonsi: Compiling shader %d\n", count); - if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) + if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) { + fprintf(stderr, "%s LLVM IR:\n\n", name); LLVMDumpModule(mod); + fprintf(stderr, "\n"); + } } if (!si_replace_shader(count, binary)) { @@ -4106,6 +4119,20 @@ int si_compile_llvm(struct si_screen *sscreen, si_shader_binary_read_config(binary, conf, 0); + /* Enable 64-bit and 16-bit denormals, because there is no performance + * cost. + * + * If denormals are enabled, all floating-point output modifiers are + * ignored. + * + * Don't enable denormals for 32-bit floats, because: + * - Floating-point output modifiers would be ignored by the hw. + * - Some opcodes don't support denormals, such as v_mad_f32. We would + * have to stop using those. + * - SI & CI would be very slow. + */ + conf->float_mode |= V_00B028_FP_64_DENORMS; + FREE(binary->config); FREE(binary->global_symbol_offsets); binary->config = NULL; @@ -4116,7 +4143,7 @@ int si_compile_llvm(struct si_screen *sscreen, /* Generate code for the hardware VS shader stage to go with a geometry shader */ static int si_generate_gs_copy_shader(struct si_screen *sscreen, struct si_shader_context *si_shader_ctx, - struct si_shader *gs, bool dump, + struct si_shader *gs, struct pipe_debug_callback *debug) { struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; @@ -4186,14 +4213,14 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld); - if (dump) - fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n"); - r = si_compile_llvm(sscreen, &si_shader_ctx->shader->binary, &si_shader_ctx->shader->config, si_shader_ctx->tm, bld_base->base.gallivm->module, - debug, TGSI_PROCESSOR_GEOMETRY); + debug, TGSI_PROCESSOR_GEOMETRY, + "GS Copy Shader"); if (!r) { + if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY)) + fprintf(stderr, "GS Copy Shader:\n"); si_shader_dump(sscreen, si_shader_ctx->shader, debug, TGSI_PROCESSOR_GEOMETRY); r = si_shader_binary_upload(sscreen, si_shader_ctx->shader); @@ -4250,47 +4277,26 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) } } -int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, - struct si_shader *shader, - struct pipe_debug_callback *debug) +static void si_init_shader_ctx(struct si_shader_context *ctx, + struct si_screen *sscreen, + struct si_shader *shader, + LLVMTargetMachineRef tm, + struct tgsi_shader_info *info) { - struct si_shader_selector *sel = shader->selector; - struct tgsi_token *tokens = sel->tokens; - struct si_shader_context si_shader_ctx; - struct lp_build_tgsi_context * bld_base; - struct tgsi_shader_info stipple_shader_info; - LLVMModuleRef mod; - int r = 0; - bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT && - shader->key.ps.poly_stipple; - bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor); - - if (poly_stipple) { - tokens = util_pstipple_create_fragment_shader(tokens, NULL, - SI_POLY_STIPPLE_SAMPLER, - TGSI_FILE_SYSTEM_VALUE); - tgsi_scan_shader(tokens, &stipple_shader_info); - } - - /* Dump TGSI code before doing TGSI->LLVM conversion in case the - * conversion fails. */ - if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) { - si_dump_shader_key(sel->type, &shader->key, stderr); - tgsi_dump(tokens, 0); - si_dump_streamout(&sel->so); - } - - assert(shader->nparam == 0); - - memset(&si_shader_ctx, 0, sizeof(si_shader_ctx)); - radeon_llvm_context_init(&si_shader_ctx.radeon_bld); - bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; - - if (sel->type != PIPE_SHADER_COMPUTE) - shader->dx10_clamp_mode = true; + struct lp_build_tgsi_context *bld_base; + + memset(ctx, 0, sizeof(*ctx)); + radeon_llvm_context_init(&ctx->radeon_bld); + ctx->tm = tm; + ctx->screen = sscreen; + if (shader && shader->selector) + ctx->type = shader->selector->info.processor; + else + ctx->type = -1; + ctx->shader = shader; - shader->uses_instanceid = sel->info.uses_instanceid; - bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info; + bld_base = &ctx->radeon_bld.soa.bld_base; + bld_base->info = info; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action; @@ -4326,12 +4332,45 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; } +} + +int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + struct si_shader_selector *sel = shader->selector; + struct tgsi_token *tokens = sel->tokens; + struct si_shader_context si_shader_ctx; + struct lp_build_tgsi_context * bld_base; + struct tgsi_shader_info stipple_shader_info; + LLVMModuleRef mod; + int r = 0; + bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT && + shader->key.ps.poly_stipple; + + if (poly_stipple) { + tokens = util_pstipple_create_fragment_shader(tokens, NULL, + SI_POLY_STIPPLE_SAMPLER, + TGSI_FILE_SYSTEM_VALUE); + tgsi_scan_shader(tokens, &stipple_shader_info); + } + /* Dump TGSI code before doing TGSI->LLVM conversion in case the + * conversion fails. */ + if (r600_can_dump_shader(&sscreen->b, sel->info.processor) && + !(sscreen->b.debug_flags & DBG_NO_TGSI)) { + si_dump_shader_key(sel->type, &shader->key, stderr); + tgsi_dump(tokens, 0); + si_dump_streamout(&sel->so); + } + + si_init_shader_ctx(&si_shader_ctx, sscreen, shader, tm, + poly_stipple ? &stipple_shader_info : &sel->info); + + shader->uses_instanceid = sel->info.uses_instanceid; + + bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; si_shader_ctx.radeon_bld.load_system_value = declare_system_value; - si_shader_ctx.shader = shader; - si_shader_ctx.type = tgsi_get_processor_type(tokens); - si_shader_ctx.screen = sscreen; - si_shader_ctx.tm = tm; switch (si_shader_ctx.type) { case TGSI_PROCESSOR_VERTEX: @@ -4401,7 +4440,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm, - mod, debug, si_shader_ctx.type); + mod, debug, si_shader_ctx.type, "TGSI shader"); if (r) { fprintf(stderr, "LLVM failed to compile shader\n"); goto out; @@ -4422,7 +4461,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, shader->gs_copy_shader->selector = shader->selector; si_shader_ctx.shader = shader->gs_copy_shader; if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx, - shader, dump, debug))) { + shader, debug))) { free(shader->gs_copy_shader); shader->gs_copy_shader = NULL; goto out; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 98bdb890a45..c42c51e0455 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -88,7 +88,6 @@ struct radeon_shader_reloc; #define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ #define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ #define SI_SGPR_ALPHA_REF 8 /* PS only */ -#define SI_SGPR_PS_STATE_BITS 9 /* PS only */ #define SI_VS_NUM_USER_SGPR 13 /* API VS */ #define SI_ES_NUM_USER_SGPR 12 /* API VS */ @@ -97,7 +96,7 @@ struct radeon_shader_reloc; #define SI_TES_NUM_USER_SGPR 10 #define SI_GS_NUM_USER_SGPR 8 #define SI_GSCOPY_NUM_USER_SGPR 4 -#define SI_PS_NUM_USER_SGPR 10 +#define SI_PS_NUM_USER_SGPR 9 /* LLVM function parameter indices */ #define SI_PARAM_RW_BUFFERS 0 @@ -152,27 +151,23 @@ struct radeon_shader_reloc; /* PS only parameters */ #define SI_PARAM_ALPHA_REF 4 -/* Bits: - * 0: force_persample_interp - */ -#define SI_PARAM_PS_STATE_BITS 5 -#define SI_PARAM_PRIM_MASK 6 -#define SI_PARAM_PERSP_SAMPLE 7 -#define SI_PARAM_PERSP_CENTER 8 -#define SI_PARAM_PERSP_CENTROID 9 -#define SI_PARAM_PERSP_PULL_MODEL 10 -#define SI_PARAM_LINEAR_SAMPLE 11 -#define SI_PARAM_LINEAR_CENTER 12 -#define SI_PARAM_LINEAR_CENTROID 13 -#define SI_PARAM_LINE_STIPPLE_TEX 14 -#define SI_PARAM_POS_X_FLOAT 15 -#define SI_PARAM_POS_Y_FLOAT 16 -#define SI_PARAM_POS_Z_FLOAT 17 -#define SI_PARAM_POS_W_FLOAT 18 -#define SI_PARAM_FRONT_FACE 19 -#define SI_PARAM_ANCILLARY 20 -#define SI_PARAM_SAMPLE_COVERAGE 21 -#define SI_PARAM_POS_FIXED_PT 22 +#define SI_PARAM_PRIM_MASK 5 +#define SI_PARAM_PERSP_SAMPLE 6 +#define SI_PARAM_PERSP_CENTER 7 +#define SI_PARAM_PERSP_CENTROID 8 +#define SI_PARAM_PERSP_PULL_MODEL 9 +#define SI_PARAM_LINEAR_SAMPLE 10 +#define SI_PARAM_LINEAR_CENTER 11 +#define SI_PARAM_LINEAR_CENTROID 12 +#define SI_PARAM_LINE_STIPPLE_TEX 13 +#define SI_PARAM_POS_X_FLOAT 14 +#define SI_PARAM_POS_Y_FLOAT 15 +#define SI_PARAM_POS_Z_FLOAT 16 +#define SI_PARAM_POS_W_FLOAT 17 +#define SI_PARAM_FRONT_FACE 18 +#define SI_PARAM_ANCILLARY 19 +#define SI_PARAM_SAMPLE_COVERAGE 20 +#define SI_PARAM_POS_FIXED_PT 21 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) @@ -193,14 +188,6 @@ struct si_shader_selector { /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; - /* Whether the shader has to use a conditional assignment to - * choose between weights when emulating - * pipe_rasterizer_state::force_persample_interp. - * If false, "si_emit_spi_ps_input" will take care of it instead. - */ - bool forces_persample_interp_for_persp; - bool forces_persample_interp_for_linear; - /* GS parameters. */ unsigned esgs_itemsize; unsigned gs_input_verts_per_prim; @@ -245,6 +232,7 @@ union si_shader_key { unsigned poly_stipple:1; unsigned poly_line_smoothing:1; unsigned clamp_color:1; + unsigned force_persample_interp:1; } ps; struct { unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; @@ -272,6 +260,7 @@ struct si_shader_config { unsigned num_vgprs; unsigned lds_size; unsigned spi_ps_input_ena; + unsigned spi_ps_input_addr; unsigned float_mode; unsigned scratch_bytes_per_wave; unsigned rsrc1; @@ -290,14 +279,10 @@ struct si_shader { struct radeon_shader_binary binary; struct si_shader_config config; - unsigned nparam; unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS]; - unsigned ps_input_param_offset[PIPE_MAX_SHADER_INPUTS]; - unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS]; bool uses_instanceid; unsigned nr_pos_exports; unsigned nr_param_exports; - bool dx10_clamp_mode; /* convert NaNs to 0 */ }; static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) @@ -343,7 +328,8 @@ int si_compile_llvm(struct si_screen *sscreen, LLVMTargetMachineRef tm, LLVMModuleRef mod, struct pipe_debug_callback *debug, - unsigned processor); + unsigned processor, + const char *name); void si_shader_destroy(struct si_shader *shader); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 507f45938ce..e9a017534d1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -133,7 +133,6 @@ union si_state_atoms { struct r600_atom *viewports; struct r600_atom *stencil_ref; struct r600_atom *spi_map; - struct r600_atom *spi_ps_input; } s; struct r600_atom *array[0]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index bbef429edc5..77a4e47c809 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader) shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B528_SGPRS((num_sgprs - 1) / 8) | S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B528_DX10_CLAMP(shader->dx10_clamp_mode); + S_00B528_DX10_CLAMP(1) | + S_00B528_FLOAT_MODE(shader->config.float_mode); shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } @@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B428_SGPRS((num_sgprs - 1) / 8) | - S_00B428_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B428_DX10_CLAMP(1) | + S_00B428_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, S_00B42C_USER_SGPR(num_user_sgprs) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); @@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader) S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B328_SGPRS((num_sgprs - 1) / 8) | S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B328_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B328_DX10_CLAMP(1) | + S_00B328_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, S_00B32C_USER_SGPR(num_user_sgprs) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); @@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B228_SGPRS((num_sgprs - 1) / 8) | - S_00B228_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B228_DX10_CLAMP(1) | + S_00B228_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, S_00B22C_USER_SGPR(num_user_sgprs) | S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); @@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B128_SGPRS((num_sgprs - 1) / 8) | S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B128_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B128_DX10_CLAMP(1) | + S_00B128_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, S_00B12C_USER_SGPR(num_user_sgprs) | S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | @@ -404,6 +409,18 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) si_set_tesseval_regs(shader, pm4); } +static unsigned si_get_ps_num_interp(struct si_shader *ps) +{ + struct tgsi_shader_info *info = &ps->selector->info; + unsigned num_colors = !!(info->colors_read & 0x0f) + + !!(info->colors_read & 0xf0); + unsigned num_interp = ps->selector->info.num_inputs + + (ps->key.ps.color_two_side ? num_colors : 0); + + assert(num_interp <= 32); + return MIN2(num_interp, 32); +} + static unsigned si_get_spi_shader_col_format(struct si_shader *shader) { unsigned value = shader->key.ps.spi_shader_col_format; @@ -460,6 +477,17 @@ static void si_shader_ps(struct si_shader *shader) unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; bool has_centroid; + unsigned input_ena = shader->config.spi_ps_input_ena; + + /* we need to enable at least one of them, otherwise we hang the GPU */ + assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || + G_0286CC_PERSP_CENTER_ENA(input_ena) || + G_0286CC_PERSP_CENTROID_ENA(input_ena) || + G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) || + G_0286CC_LINEAR_SAMPLE_ENA(input_ena) || + G_0286CC_LINEAR_CENTER_ENA(input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(input_ena) || + G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -503,11 +531,15 @@ static void si_shader_ps(struct si_shader *shader) shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)) spi_shader_col_format = V_028714_SPI_SHADER_32_R; + si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena); + si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, + shader->config.spi_ps_input_addr); + /* Set interpolation controls. */ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) || G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena); - spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) | + spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader)) | S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid); /* Set registers. */ @@ -540,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B028_SGPRS((num_sgprs - 1) / 8) | - S_00B028_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B028_DX10_CLAMP(1) | + S_00B028_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | S_00B02C_USER_SGPR(num_user_sgprs) | @@ -681,7 +714,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY; bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS; - key->ps.color_two_side = rs->two_side; + key->ps.color_two_side = rs->two_side && sel->info.colors_read; if (sctx->queued.named.blend) { key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one && @@ -694,6 +727,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, (is_line && rs->line_smooth)) && sctx->framebuffer.nr_samples <= 1; key->ps.clamp_color = rs->clamp_fragment_color; + + key->ps.force_persample_interp = rs->force_persample_interp && + rs->multisample_enable && + sctx->framebuffer.nr_samples > 1 && + sctx->ps_iter_samples > 1 && + (sel->info.uses_persp_center || + sel->info.uses_persp_centroid || + sel->info.uses_linear_center || + sel->info.uses_linear_centroid); } key->ps.alpha_func = si_get_alpha_test_func(sctx); @@ -796,7 +838,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor); p_atomic_inc(&sscreen->b.num_shaders_created); - /* First set which opcode uses which (i,j) pair. */ + /* Set which opcode uses which (i,j) pair. */ if (sel->info.uses_persp_opcode_interp_centroid) sel->info.uses_persp_centroid = true; @@ -811,19 +853,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->info.uses_linear_opcode_interp_sample) sel->info.uses_linear_center = true; - /* Determine if the shader has to use a conditional assignment when - * emulating force_persample_interp. - */ - sel->forces_persample_interp_for_persp = - sel->info.uses_persp_center + - sel->info.uses_persp_centroid + - sel->info.uses_persp_sample >= 2; - - sel->forces_persample_interp_for_linear = - sel->info.uses_linear_center + - sel->info.uses_linear_centroid + - sel->info.uses_linear_sample >= 2; - switch (sel->type) { case PIPE_SHADER_GEOMETRY: sel->gs_output_prim = @@ -893,7 +922,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx, } /* Pre-compilation. */ - if (sscreen->b.debug_flags & DBG_PRECOMPILE) { + if (sel->type == PIPE_SHADER_GEOMETRY || + sscreen->b.debug_flags & DBG_PRECOMPILE) { struct si_shader_ctx_state state = {sel}; union si_shader_key key; @@ -1030,6 +1060,41 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) si_mark_atom_dirty(sctx, &sctx->cb_render_state); } +static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) +{ + if (shader->pm4) { + switch (shader->selector->type) { + case PIPE_SHADER_VERTEX: + if (shader->key.vs.as_ls) + si_pm4_delete_state(sctx, ls, shader->pm4); + else if (shader->key.vs.as_es) + si_pm4_delete_state(sctx, es, shader->pm4); + else + si_pm4_delete_state(sctx, vs, shader->pm4); + break; + case PIPE_SHADER_TESS_CTRL: + si_pm4_delete_state(sctx, hs, shader->pm4); + break; + case PIPE_SHADER_TESS_EVAL: + if (shader->key.tes.as_es) + si_pm4_delete_state(sctx, es, shader->pm4); + else + si_pm4_delete_state(sctx, vs, shader->pm4); + break; + case PIPE_SHADER_GEOMETRY: + si_pm4_delete_state(sctx, gs, shader->pm4); + si_pm4_delete_state(sctx, vs, shader->gs_copy_shader->pm4); + break; + case PIPE_SHADER_FRAGMENT: + si_pm4_delete_state(sctx, ps, shader->pm4); + break; + } + } + + si_shader_destroy(shader); + free(shader); +} + static void si_delete_shader_selector(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; @@ -1050,35 +1115,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) while (p) { c = p->next_variant; - switch (sel->type) { - case PIPE_SHADER_VERTEX: - if (p->key.vs.as_ls) - si_pm4_delete_state(sctx, ls, p->pm4); - else if (p->key.vs.as_es) - si_pm4_delete_state(sctx, es, p->pm4); - else - si_pm4_delete_state(sctx, vs, p->pm4); - break; - case PIPE_SHADER_TESS_CTRL: - si_pm4_delete_state(sctx, hs, p->pm4); - break; - case PIPE_SHADER_TESS_EVAL: - if (p->key.tes.as_es) - si_pm4_delete_state(sctx, es, p->pm4); - else - si_pm4_delete_state(sctx, vs, p->pm4); - break; - case PIPE_SHADER_GEOMETRY: - si_pm4_delete_state(sctx, gs, p->pm4); - si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4); - break; - case PIPE_SHADER_FRAGMENT: - si_pm4_delete_state(sctx, ps, p->pm4); - break; - } - - si_shader_destroy(p); - free(p); + si_delete_shader(sctx, p); p = c; } @@ -1087,132 +1124,86 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) free(sel); } +static unsigned si_get_ps_input_cntl(struct si_context *sctx, + struct si_shader *vs, unsigned name, + unsigned index, unsigned interpolate) +{ + struct tgsi_shader_info *vsinfo = &vs->selector->info; + unsigned j, ps_input_cntl = 0; + + if (interpolate == TGSI_INTERPOLATE_CONSTANT || + (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade)) + ps_input_cntl |= S_028644_FLAT_SHADE(1); + + if (name == TGSI_SEMANTIC_PCOORD || + (name == TGSI_SEMANTIC_TEXCOORD && + sctx->sprite_coord_enable & (1 << index))) { + ps_input_cntl |= S_028644_PT_SPRITE_TEX(1); + } + + for (j = 0; j < vsinfo->num_outputs; j++) { + if (name == vsinfo->output_semantic_name[j] && + index == vsinfo->output_semantic_index[j]) { + ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]); + break; + } + } + + if (name == TGSI_SEMANTIC_PRIMID) + /* PrimID is written after the last output. */ + ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]); + else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) { + /* No corresponding output found, load defaults into input. + * Don't set any other bits. + * (FLAT_SHADE=1 completely changes behavior) */ + ps_input_cntl = S_028644_OFFSET(0x20); + } + return ps_input_cntl; +} + static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; struct si_shader *ps = sctx->ps_shader.current; struct si_shader *vs = si_get_vs_state(sctx); - struct tgsi_shader_info *psinfo; - struct tgsi_shader_info *vsinfo = &vs->selector->info; - unsigned i, j, tmp, num_written = 0; + struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL; + unsigned i, num_interp, num_written = 0, bcol_interp[2]; - if (!ps || !ps->nparam) + if (!ps || !ps->selector->info.num_inputs) return; - psinfo = &ps->selector->info; - - radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam); + num_interp = si_get_ps_num_interp(ps); + assert(num_interp > 0); + radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp); for (i = 0; i < psinfo->num_inputs; i++) { unsigned name = psinfo->input_semantic_name[i]; unsigned index = psinfo->input_semantic_index[i]; unsigned interpolate = psinfo->input_interpolate[i]; - unsigned param_offset = ps->ps_input_param_offset[i]; -bcolor: - tmp = 0; - - if (interpolate == TGSI_INTERPOLATE_CONSTANT || - (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade)) - tmp |= S_028644_FLAT_SHADE(1); - - if (name == TGSI_SEMANTIC_PCOORD || - (name == TGSI_SEMANTIC_TEXCOORD && - sctx->sprite_coord_enable & (1 << index))) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - for (j = 0; j < vsinfo->num_outputs; j++) { - if (name == vsinfo->output_semantic_name[j] && - index == vsinfo->output_semantic_index[j]) { - tmp |= S_028644_OFFSET(vs->vs_output_param_offset[j]); - break; - } - } - - if (name == TGSI_SEMANTIC_PRIMID) - /* PrimID is written after the last output. */ - tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]); - else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) { - /* No corresponding output found, load defaults into input. - * Don't set any other bits. - * (FLAT_SHADE=1 completely changes behavior) */ - tmp = S_028644_OFFSET(0x20); - } - - assert(param_offset == num_written); - radeon_emit(cs, tmp); + radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index, + interpolate)); num_written++; - if (name == TGSI_SEMANTIC_COLOR && - ps->key.ps.color_two_side) { - name = TGSI_SEMANTIC_BCOLOR; - param_offset++; - goto bcolor; + if (name == TGSI_SEMANTIC_COLOR) { + assert(index < ARRAY_SIZE(bcol_interp)); + bcol_interp[index] = interpolate; } } - assert(ps->nparam == num_written); -} -static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom) -{ - struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - struct si_shader *ps = sctx->ps_shader.current; - unsigned input_ena; - - if (!ps) - return; + if (ps->key.ps.color_two_side) { + unsigned bcol = TGSI_SEMANTIC_BCOLOR; - input_ena = ps->config.spi_ps_input_ena; + for (i = 0; i < 2; i++) { + if (!(psinfo->colors_read & (0xf << (i * 4)))) + continue; - /* we need to enable at least one of them, otherwise we hang the GPU */ - assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || - G_0286CC_PERSP_CENTER_ENA(input_ena) || - G_0286CC_PERSP_CENTROID_ENA(input_ena) || - G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) || - G_0286CC_LINEAR_SAMPLE_ENA(input_ena) || - G_0286CC_LINEAR_CENTER_ENA(input_ena) || - G_0286CC_LINEAR_CENTROID_ENA(input_ena) || - G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); - - if (sctx->force_persample_interp) { - unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) + - G_0286CC_PERSP_CENTER_ENA(input_ena) + - G_0286CC_PERSP_CENTROID_ENA(input_ena); - unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) + - G_0286CC_LINEAR_CENTER_ENA(input_ena) + - G_0286CC_LINEAR_CENTROID_ENA(input_ena); - - /* If only one set of (i,j) coordinates is used, we can disable - * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates - * where CENTER/CENTROID are expected, effectively forcing per-sample - * interpolation. - */ - if (num_persp == 1) { - input_ena &= C_0286CC_PERSP_CENTER_ENA; - input_ena &= C_0286CC_PERSP_CENTROID_ENA; - input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1); + radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol, + i, bcol_interp[i])); + num_written++; } - if (num_linear == 1) { - input_ena &= C_0286CC_LINEAR_CENTER_ENA; - input_ena &= C_0286CC_LINEAR_CENTROID_ENA; - input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1); - } - - /* If at least 2 sets of coordinates are used, we can't use this - * trick and have to select SAMPLE using a conditional assignment - * in the shader with "force_persample_interp" being a shader constant. - */ } - - radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2); - radeon_emit(cs, input_ena); - radeon_emit(cs, input_ena); - - if (ps->selector->forces_persample_interp_for_persp || - ps->selector->forces_persample_interp_for_linear) - radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + - SI_SGPR_PS_STATE_BITS * 4, - sctx->force_persample_interp); + assert(num_interp == num_written); } /** @@ -1746,12 +1737,6 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (si_pm4_state_changed(sctx, ps) || - sctx->force_persample_interp != rs->force_persample_interp) { - sctx->force_persample_interp = rs->force_persample_interp; - si_mark_atom_dirty(sctx, &sctx->spi_ps_input); - } - if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps)) si_mark_atom_dirty(sctx, &sctx->cb_render_state); @@ -1784,7 +1769,6 @@ bool si_update_shaders(struct si_context *sctx) void si_init_shader_functions(struct si_context *sctx) { si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); - si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input); sctx->b.b.create_vs_state = si_create_shader_selector; sctx->b.b.create_tcs_state = si_create_shader_selector; diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 9e1e158219f..892084707d2 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -2845,6 +2845,9 @@ #define S_00B028_FLOAT_MODE(x) (((x) & 0xFF) << 12) #define G_00B028_FLOAT_MODE(x) (((x) >> 12) & 0xFF) #define C_00B028_FLOAT_MODE 0xFFF00FFF +#define V_00B028_FP_32_DENORMS 0x30 +#define V_00B028_FP_64_DENORMS 0xc0 +#define V_00B028_FP_ALL_DENORMS 0xf0 #define S_00B028_PRIV(x) (((x) & 0x1) << 20) #define G_00B028_PRIV(x) (((x) >> 20) & 0x1) #define C_00B028_PRIV 0xFFEFFFFF diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 188347bb4ca..5a29e26517d 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -38,6 +38,7 @@ #include "sp_state.h" #include "sp_tile_cache.h" #include "sp_tex_tile_cache.h" +#include "util/u_debug_image.h" #include "util/u_memory.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index d593c781680..8e0af12d294 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -24,6 +24,7 @@ **********************************************************/ #include "pipe/p_defines.h" +#include "util/u_debug_image.h" #include "util/u_string.h" #include "svga_screen.h" #include "svga_surface.h" diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 4d03fe1ee0b..2ce2b3aef75 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -120,18 +120,13 @@ trace_context_draw_vbo(struct pipe_context *_pipe, trace_dump_trace_flush(); if (info->indirect) { - struct pipe_draw_info *_info = NULL; + struct pipe_draw_info _info; - _info = MALLOC(sizeof(*_info)); - if (!_info) - return; - - memcpy(_info, info, sizeof(*_info)); - _info->indirect = trace_resource_unwrap(tr_ctx, _info->indirect); - _info->indirect_params = trace_resource_unwrap(tr_ctx, - _info->indirect_params); - pipe->draw_vbo(pipe, _info); - FREE(_info); + memcpy(&_info, info, sizeof(_info)); + _info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect); + _info.indirect_params = trace_resource_unwrap(tr_ctx, + _info.indirect_params); + pipe->draw_vbo(pipe, &_info); } else { pipe->draw_vbo(pipe, info); } @@ -1285,6 +1280,33 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe, } static inline void +trace_context_clear_texture(struct pipe_context *_pipe, + struct pipe_resource *res, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + res = trace_resource_unwrap(tr_ctx, res); + + trace_dump_call_begin("pipe_context", "clear_texture"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, res); + trace_dump_arg(uint, level); + trace_dump_arg_begin("box"); + trace_dump_box(box); + trace_dump_arg_end(); + trace_dump_arg(ptr, data); + + pipe->clear_texture(pipe, res, level, box, data); + + trace_dump_call_end(); +} + +static inline void trace_context_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, unsigned flags) @@ -1709,6 +1731,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(clear); TR_CTX_INIT(clear_render_target); TR_CTX_INIT(clear_depth_stencil); + TR_CTX_INIT(clear_texture); TR_CTX_INIT(flush); TR_CTX_INIT(generate_mipmap); TR_CTX_INIT(texture_barrier); diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 800f16cd250..b01f6ea3dcb 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -349,6 +349,12 @@ enum pipe_flush_flags #define PIPE_CONTEXT_DEBUG (1 << 1) /** + * Whether out-of-bounds shader loads must return zero and out-of-bounds + * shader stores must be dropped. + */ +#define PIPE_CONTEXT_ROBUST_BUFFER_ACCESS (1 << 2) + +/** * Flags for pipe_context::memory_barrier. */ #define PIPE_BARRIER_MAPPED_BUFFER (1 << 0) diff --git a/src/gallium/targets/graw-null/graw_util.c b/src/gallium/targets/graw-null/graw_util.c index 07693e85f6a..03b45d99e9d 100644 --- a/src/gallium/targets/graw-null/graw_util.c +++ b/src/gallium/targets/graw-null/graw_util.c @@ -5,6 +5,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_text.h" #include "util/u_debug.h" +#include "util/u_debug_image.h" #include "util/u_memory.h" #include "state_tracker/graw.h" diff --git a/src/gallium/tests/graw/graw_util.h b/src/gallium/tests/graw/graw_util.h index f09c1eadc9c..3c7dbd061cc 100644 --- a/src/gallium/tests/graw/graw_util.h +++ b/src/gallium/tests/graw/graw_util.h @@ -9,6 +9,7 @@ #include "util/u_box.h" #include "util/u_debug.h" +#include "util/u_debug_image.h" #include "util/u_draw_quad.h" #include "util/u_format.h" #include "util/u_inlines.h" diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index 4c5a9200a52..ddee2942af9 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -50,7 +50,7 @@ /* u_sampler_view_default_template */ #include "util/u_sampler.h" /* debug_dump_surface_bmp */ -#include "util/u_debug.h" +#include "util/u_debug_image.h" /* util_draw_vertex_buffer helper */ #include "util/u_draw_quad.h" /* FREE & CALLOC_STRUCT */ diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index c71a63f44e5..914f5e75fa9 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -48,7 +48,7 @@ #include "cso_cache/cso_context.h" /* debug_dump_surface_bmp */ -#include "util/u_debug.h" +#include "util/u_debug_image.h" /* util_draw_vertex_buffer helper */ #include "util/u_draw_quad.h" /* FREE & CALLOC_STRUCT */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 35dc7e69dcf..49c310cfdf7 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -405,6 +405,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, &ws->info.num_tile_pipes); + /* The kernel returns 12 for some cards for an unknown reason. + * I thought this was supposed to be a power of two. + */ + if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) + ws->info.num_tile_pipes = 8; + if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, &ws->info.r600_gb_backend_map)) ws->info.r600_gb_backend_map_valid = TRUE; |