diff options
Diffstat (limited to 'src/gallium/drivers')
29 files changed, 512 insertions, 314 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 3b687bb8689..1433a4925fa 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -134,6 +134,8 @@ extern void cell_vertex_shader_queue_flush(struct draw_context *draw); +/* XXX find a better home for this */ +extern void cell_update_vertex_fetch(struct draw_context *draw); #endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index cbd387f0142..c839fb4d12d 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -38,6 +38,7 @@ #include "cell_context.h" #include "cell_draw_arrays.h" #include "cell_state.h" +#include "cell_flush.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index eda351b1cbc..7f940ae76b6 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -35,4 +35,8 @@ cell_flush(struct pipe_context *pipe, unsigned flags); extern void cell_flush_int(struct pipe_context *pipe, unsigned flags); +extern void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size); + #endif diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index f7ef72e5a2c..f5c27852c14 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -55,7 +55,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) struct cell_command_vs *const vs = &cell_global.command[0].vs; uint64_t *batch; struct cell_array_info *array_info; - struct cell_shader_info *shader_info; unsigned i, j; struct cell_attribute_fetch_code *cf; @@ -123,12 +122,12 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) for (j = 0; j < n; j++) { vs->elts[j] = draw->vs.queue[i + j].elt; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; } for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { vs->elts[j] = vs->elts[0]; - vs->vOut[j] = vs->vOut[0]; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; } vs->num_elts = n; @@ -137,5 +136,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); } + draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; } diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 698a5790bb0..a1701d80d18 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -33,7 +33,7 @@ #define CACHE_NAME data #define CACHED_TYPE qword #define CACHE_TYPE CACHE_TYPE_RO -#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER +#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) #define CACHE_LOG2NNWAY 2 #define CACHE_LOG2NSETS 6 #include <cache-api.h> @@ -49,43 +49,57 @@ /** * Fetch between arbitrary number of bytes from an unaligned address + * + * \param dst Destination data buffer + * \param ea Main memory effective address of source data + * \param size Number of bytes to read + * + * \warning + * As is hinted by the type of the \c dst pointer, this function writes + * multiples of 16-bytes. */ void spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) { const int shift = ea & 0x0f; - const unsigned aligned_start_ea = ea & ~0x0f; - const unsigned aligned_end_ea = ROUNDUP16(ea + size); - const unsigned num_entries = (aligned_end_ea - aligned_start_ea) / 16; + const unsigned read_size = ROUNDUP16(size + shift); + const unsigned last_read = ROUNDUP16(ea + size); + const qword *const last_write = dst + (ROUNDUP16(size) / 16); unsigned i; if (shift == 0) { /* Data is already aligned. Fetch directly into the destination buffer. */ - for (i = 0; i < num_entries; i++) { - dst[i] = cache_rd(data, ea + (i * 16)); + for (i = 0; i < size; i += 16) { + *(dst++) = cache_rd(data, ea + i); } } else { - qword tmp[2] ALIGN16_ATTRIB; - + qword hi; - tmp[0] = cache_rd(data, (ea & ~0x0f)); - for (i = 0; i < (num_entries & ~1); i++) { - const unsigned curr = i & 1; - const unsigned next = curr ^ 1; - tmp[next] = cache_rd(data, (ea & ~0x0f) + (next * 16)); - - dst[i] = si_or((qword) spu_slqwbyte(tmp[curr], shift), - (qword) spu_rlmaskqwbyte(tmp[next], shift - 16)); + /* Please exercise extreme caution when modifying this code. This code + * must not read past the end of the page containing the source data, + * and it must not write more than ((size + 15) / 16) qwords to the + * destination buffer. + */ + ea &= ~0x0f; + hi = cache_rd(data, ea); + for (i = 16; i < read_size; i += 16) { + qword lo = cache_rd(data, ea + i); + + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), + (qword) spu_rlmaskqwbyte(lo, shift - 16)); + hi = lo; } - if (i < num_entries) { - dst[i] = si_or((qword) spu_slqwbyte(tmp[(i & 1)], shift), - si_il(0)); + if (dst != last_write) { + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); } } + + ASSERT((ea + i) == last_read); + ASSERT(dst == last_write); } diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index cf81bee8fde..1560c0f1574 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -50,8 +50,6 @@ * Brian Paul */ -#include <libmisc.h> -#include <spu_mfcio.h> #include <transpose_matrix4x4.h> #include <simdmath/ceilf4.h> #include <simdmath/cosf4.h> @@ -151,6 +149,7 @@ spu_exec_machine_init(struct spu_exec_machine *mach, const qword zero = si_il(0); const qword not_zero = si_il(~0); + (void) numSamplers; mach->Samplers = samplers; mach->Processor = processor; mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; @@ -659,9 +658,10 @@ fetch_texel( struct spu_sampler *sampler, qword rgba[4]; qword out[4]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, + (float (*)[4]) rgba); - _transpose_matrix4x4(out, rgba); + _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); r->q = out[0]; g->q = out[1]; b->q = out[2]; diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 1136dba62d5..cc4bafdb3ac 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -38,6 +38,7 @@ #include "spu_tile.h" //#include "spu_test.h" #include "spu_vertex_shader.h" +#include "spu_dcache.h" #include "cell/common.h" #include "pipe/p_defines.h" @@ -434,7 +435,7 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); break; case CELL_CMD_STATE_UNIFORMS: - draw.constants = (float (*)[4]) (uintptr_t) buffer[pos + 1]; + draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; pos += 2; break; case CELL_CMD_STATE_VS_ARRAY_INFO: @@ -583,7 +584,7 @@ main(main_param_t speid, main_param_t argp) one_time_init(); if (Debug) - printf("SPU: main() speid=%lu\n", speid); + printf("SPU: main() speid=%lu\n", (unsigned long) speid); mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 5c95d112ac1..d14f1abbe74 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -131,7 +131,10 @@ extern boolean Debug; #define TAG_BATCH_BUFFER 17 #define TAG_MISC 18 #define TAG_TEXTURE_TILE 19 -#define TAG_INSTRUCTION_FETCH 20 +#define TAG_DCACHE0 20 +#define TAG_DCACHE1 21 +#define TAG_DCACHE2 22 +#define TAG_DCACHE3 23 diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index f7e4e653e31..219fd90cc0e 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -32,8 +32,6 @@ * Ian Romanick <[email protected]> */ -#include <spu_mfcio.h> - #include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index acfa3494397..c3955bbd2dd 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -298,10 +298,12 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915_init_string_functions(i915); i915_init_texture_functions(i915); + draw_install_aaline_stage(i915->draw, &i915->pipe); + draw_install_aapoint_stage(i915->draw, &i915->pipe); + i915->pci_id = pci_id; i915->flags.is_i945 = is_i945; - i915->dirty = ~0; i915->hardware_dirty = ~0; diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 2d876925b2c..d32dded6bdc 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -79,6 +79,40 @@ #define I915_MAX_CONSTANT 32 +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + struct i915_cache_context; @@ -93,11 +127,6 @@ struct i915_state float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; /** number of constants passed in through a constant buffer */ uint num_user_constants[PIPE_SHADER_TYPES]; - /** user constants, plus extra constants from shader translation */ - uint num_constants[PIPE_SHADER_TYPES]; - - uint *program; - uint program_len; /* texture sampler state */ unsigned sampler[I915_TEX_UNITS][3]; @@ -187,7 +216,8 @@ struct i915_context const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; const struct i915_depth_stencil_state *depth_stencil; const struct i915_rasterizer_state *rasterizer; - const struct pipe_shader_state *fs; + + struct i915_fragment_shader *fs; struct pipe_blend_color blend_color; struct pipe_clip_state clip; @@ -233,6 +263,7 @@ struct i915_context #define I915_NEW_TEXTURE 0x800 #define I915_NEW_CONSTANTS 0x1000 #define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 /* Driver's internally generated state flags: diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 3c2069b8273..96a54281f11 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "draw/draw_context.h" #include "i915_context.h" #include "i915_reg.h" #include "i915_batch.h" @@ -44,6 +45,8 @@ static void i915_flush( struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + /* Do we need to emit an MI_FLUSH command to flush the hardware * caches? */ diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h index 8c7b68aefb5..250dfe6dbf0 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -44,9 +44,16 @@ * Program translation state */ struct i915_fp_compile { - const struct pipe_shader_state *shader; + struct i915_fragment_shader *shader; /* the shader we're compiling */ - struct vertex_info *vertex_info; + boolean used_constants[I915_MAX_CONSTANT]; + + /** maps TGSI immediate index to constant slot */ + uint num_immediates; + uint immediates_map[I915_MAX_CONSTANT]; + float immediates[I915_MAX_CONSTANT][4]; + + boolean first_instruction; uint declarations[I915_PROGRAM_SIZE]; uint program[I915_PROGRAM_SIZE]; @@ -57,11 +64,6 @@ struct i915_fp_compile { uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - /** points into the i915->current.constants array: */ - float (*constants)[4]; - uint num_constants; - uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */ - uint *csr; /**< Cursor, points into program. */ uint *decl; /**< Cursor, points into declarations. */ @@ -155,7 +157,9 @@ swizzle(int reg, uint x, uint y, uint z, uint w) /*********************************************************************** * Public interface for the compiler */ -extern void i915_translate_fragment_program( struct i915_context *i915 ); +extern void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs); @@ -206,8 +210,5 @@ extern void i915_disassemble_program(const uint * program, uint sz); extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); -extern void -i915_translate_fragment_program(struct i915_context *i915); - #endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c index 74924ff0a1d..4bdeefb449b 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -61,8 +61,6 @@ (REG_NR_MASK << UREG_NR_SHIFT)) -#define I915_CONSTFLAG_PARAM 0x1f - uint i915_get_temp(struct i915_fp_compile *p) { @@ -73,10 +71,21 @@ i915_get_temp(struct i915_fp_compile *p) } p->temp_flag |= 1 << (bit - 1); - return UREG(REG_TYPE_R, (bit - 1)); + return bit - 1; +} + + +static void +i915_release_temp(struct i915_fp_compile *p, int reg) +{ + p->temp_flag &= ~(1 << reg); } +/** + * Get unpreserved temporary, a temp whose value is not preserved between + * PS program phases. + */ uint i915_get_utemp(struct i915_fp_compile * p) { @@ -185,41 +194,62 @@ i915_emit_arith(struct i915_fp_compile * p, return dest; } + +/** + * Emit a texture load or texkill instruction. + * \param dest the dest i915 register + * \param destmask the dest register writemask + * \param sampler the i915 sampler register + * \param coord the i915 source texcoord operand + * \param opcode the instruction opcode + */ uint i915_emit_texld( struct i915_fp_compile *p, uint dest, uint destmask, uint sampler, uint coord, - uint op ) + uint opcode ) { - uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + int temp = -1; + if (coord != k) { - /* No real way to work around this in the general case - need to - * allocate and declare a new temporary register (a utemp won't - * do). Will fallback for now. + /* texcoord is swizzled or negated. Need to allocate a new temporary + * register (a utemp / unpreserved temp) won't do. */ - i915_program_error(p, "Can't (yet) swizzle TEX arguments"); - assert(0); - return 0; + uint tempReg; + + temp = i915_get_temp(p); /* get temp reg index */ + tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ + + i915_emit_arith( p, A0_MOV, + tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ + 0, /* saturate */ + coord, 0, 0 ); /* src0, src1, src2 */ + + /* new src texcoord is tempReg */ + coord = tempReg; } /* Don't worry about saturate as we only support */ if (destmask != A0_DEST_CHANNEL_ALL) { + /* if not writing to XYZW... */ uint tmp = i915_get_utemp(p); - i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); - return dest; + /* XXX release utemp here? */ } else { assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + /* is the sampler coord a texcoord input reg? */ if (GET_UREG_TYPE(coord) != REG_TYPE_T) { p->nr_tex_indirect++; } - *(p->csr++) = (op | + *(p->csr++) = (opcode | T0_DEST( dest ) | T0_SAMPLER( sampler )); @@ -227,14 +257,19 @@ uint i915_emit_texld( struct i915_fp_compile *p, *(p->csr++) = T2_MBZ; p->nr_tex_insn++; - return dest; } + + if (temp >= 0) + i915_release_temp(p, temp); + + return dest; } uint i915_emit_const1f(struct i915_fp_compile * p, float c0) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) @@ -243,15 +278,15 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 4; idx++) { - if (!(p->constant_flags[reg] & (1 << idx)) || - p->constants[reg][idx] == c0) { - p->constants[reg][idx] = c0; - p->constant_flags[reg] |= 1 << idx; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + if (!(ifs->constant_flags[reg] & (1 << idx)) || + ifs->constants[reg][idx] == c0) { + ifs->constants[reg][idx] = c0; + ifs->constant_flags[reg] |= 1 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); } } @@ -264,6 +299,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) uint i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) @@ -277,16 +313,16 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == 0xf || - p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + if (ifs->constant_flags[reg] == 0xf || + ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 3; idx++) { - if (!(p->constant_flags[reg] & (3 << idx))) { - p->constants[reg][idx + 0] = c0; - p->constants[reg][idx + 1] = c1; - p->constant_flags[reg] |= 3 << idx; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + if (!(ifs->constant_flags[reg] & (3 << idx))) { + ifs->constants[reg][idx + 0] = c0; + ifs->constants[reg][idx + 1] = c1; + ifs->constant_flags[reg] |= 3 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); } } @@ -302,25 +338,26 @@ uint i915_emit_const4f(struct i915_fp_compile * p, float c0, float c1, float c2, float c3) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg; for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == 0xf && - p->constants[reg][0] == c0 && - p->constants[reg][1] == c1 && - p->constants[reg][2] == c2 && - p->constants[reg][3] == c3) { + if (ifs->constant_flags[reg] == 0xf && + ifs->constants[reg][0] == c0 && + ifs->constants[reg][1] == c1 && + ifs->constants[reg][2] == c2 && + ifs->constants[reg][3] == c3) { return UREG(REG_TYPE_CONST, reg); } - else if (p->constant_flags[reg] == 0) { - - p->constants[reg][0] = c0; - p->constants[reg][1] = c1; - p->constants[reg][2] = c2; - p->constants[reg][3] = c3; - p->constant_flags[reg] = 0xf; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + else if (ifs->constant_flags[reg] == 0) { + + ifs->constants[reg][0] = c0; + ifs->constants[reg][1] = c1; + ifs->constants[reg][2] = c2; + ifs->constants[reg][3] = c3; + ifs->constant_flags[reg] = 0xf; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return UREG(REG_TYPE_CONST, reg); } } @@ -335,41 +372,3 @@ i915_emit_const4fv(struct i915_fp_compile * p, const float * c) { return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); } - - -#if 00000/*UNUSED*/ -/* Reserve a slot in the constant file for a Mesa state parameter. - * These will later need to be tracked on statechanges, but that is - * done elsewhere. - */ -uint -i915_emit_param4fv(struct i915_fp_compile * p, const float * values) -{ - struct i915_fragment_program *fp = p->fp; - int i; - - for (i = 0; i < fp->nr_params; i++) { - if (fp->param[i].values == values) - return UREG(REG_TYPE_CONST, fp->param[i].reg); - } - - if (p->constants->nr_constants == I915_MAX_CONSTANT || - fp->nr_params == I915_MAX_CONSTANT) { - i915_program_error(p, "i915_emit_param4fv: out of constants\n"); - return 0; - } - - { - int reg = p->constants->nr_constants++; - int i = fp->nr_params++; - - assert (p->constant_flags[reg] == 0); - p->constant_flags[reg] = I915_CONSTFLAG_PARAM; - - fp->param[i].values = values; - fp->param[i].reg = reg; - - return UREG(REG_TYPE_CONST, reg); - } -} -#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 6c1524c768e..76a2184e9ab 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -34,6 +34,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_dump.h" #include "draw/draw_vertex.h" @@ -97,19 +98,19 @@ negate(int reg, int x, int y, int z, int w) } +/** + * In the event of a translation failure, we'll generate a simple color + * pass-through program. + */ static void -i915_use_passthrough_shader(struct i915_context *i915) +i915_use_passthrough_shader(struct i915_fragment_shader *fs) { - debug_printf("**** Using i915 pass-through fragment shader\n"); - - i915->current.program = (uint *) MALLOC(sizeof(passthrough)); - if (i915->current.program) { - memcpy(i915->current.program, passthrough, sizeof(passthrough)); - i915->current.program_len = Elements(passthrough); + fs->program = (uint *) MALLOC(sizeof(passthrough)); + if (fs->program) { + memcpy(fs->program, passthrough, sizeof(passthrough)); + fs->program_len = Elements(passthrough); } - - i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0; - i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0; + fs->num_constants = 0; } @@ -161,9 +162,6 @@ src_vector(struct i915_fp_compile *p, * We also use a texture coordinate to pass wpos when possible. */ - /* use vertex format info to map a slot number to a VF attrib */ - assert(index < p->vertex_info->num_attribs); - sem_name = p->input_semantic_name[index]; sem_ind = p->input_semantic_index[index]; @@ -201,7 +199,8 @@ src_vector(struct i915_fp_compile *p, break; case TGSI_FILE_IMMEDIATE: - /* XXX unfinished - need to append immediates onto const buffer */ + assert(index < p->num_immediates); + index = p->immediates_map[index]; /* fall-through */ case TGSI_FILE_CONSTANT: src = UREG(REG_TYPE_CONST, index); @@ -386,6 +385,26 @@ emit_simple_arith(struct i915_fp_compile *p, arg3 ); } + +/** As above, but swap the first two src regs */ +static void +emit_simple_arith_swap2(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + struct tgsi_full_instruction inst2; + + assert(numArgs == 2); + + /* transpose first two registers */ + inst2 = *inst; + inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; + inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + + emit_simple_arith(p, &inst2, opcode, numArgs); +} + + #ifndef M_PI #define M_PI 3.14159265358979323846 #endif @@ -556,8 +575,12 @@ i915_translate_instruction(struct i915_fp_compile *p, src0 = src_vector(p, &inst->FullSrcRegisters[0]); tmp = i915_get_utemp(p); - i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ - 0, src0, T0_TEXKILL); + i915_emit_texld(p, + tmp, /* dest reg: a dummy reg */ + A0_DEST_CHANNEL_ALL, /* dest writemask */ + 0, /* sampler */ + src0, /* coord*/ + T0_TEXKILL); /* opcode */ break; case TGSI_OPCODE_LG2: @@ -773,6 +796,11 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_SGE, 2); break; + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2); + break; + case TGSI_OPCODE_SIN: src0 = src_vector(p, &inst->FullSrcRegisters[0]); tmp = i915_get_utemp(p); @@ -827,6 +855,11 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_SLT, 2); break; + case TGSI_OPCODE_SGT: + /* like SLT, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SLT, 2); + break; + case TGSI_OPCODE_SUB: src0 = src_vector(p, &inst->FullSrcRegisters[0]); src1 = src_vector(p, &inst->FullSrcRegisters[1]); @@ -880,6 +913,7 @@ i915_translate_instruction(struct i915_fp_compile *p, default: i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + p->error = 1; return; } @@ -896,6 +930,7 @@ static void i915_translate_instructions(struct i915_fp_compile *p, const struct tgsi_token *tokens) { + struct i915_fragment_shader *ifs = p->shader; struct tgsi_parse_context parse; tgsi_parse_init( &parse, tokens ); @@ -928,13 +963,64 @@ i915_translate_instructions(struct i915_fp_compile *p, p->output_semantic_name[ind] = sem; p->output_semantic_index[ind] = semi; } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); + } + } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + i++) { + assert(i < I915_MAX_TEMPORARY); + p->temp_flag |= (1 << i); /* mark temp as used */ + } + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX append the immediate to the const buffer... */ + { + const struct tgsi_full_immediate *imm + = &parse.FullToken.FullImmediate; + const uint pos = p->num_immediates++; + uint j; + for (j = 0; j < imm->Immediate.Size; j++) { + p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; + } + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; + } + } + } + + p->first_instruction = FALSE; + } + i915_translate_instruction(p, &parse.FullToken.FullInstruction); break; @@ -950,32 +1036,33 @@ i915_translate_instructions(struct i915_fp_compile *p, static struct i915_fp_compile * i915_init_compile(struct i915_context *i915, - const struct pipe_shader_state *fs) + struct i915_fragment_shader *ifs) { struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); - p->shader = i915->fs; + p->shader = ifs; - p->vertex_info = &i915->current.vertex_info; - - /* new constants found during translation get appended after the - * user-provided constants. + /* Put new constants at end of const buffer, growing downward. + * The problem is we don't know how many user-defined constants might + * be specified with pipe->set_constant_buffer(). + * Should pre-scan the user's program to determine the highest-numbered + * constant referenced. */ - p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT]; - p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]; + ifs->num_constants = 0; + memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + + p->first_instruction = TRUE; p->nr_tex_indirect = 1; /* correct? */ p->nr_tex_insn = 0; p->nr_alu_insn = 0; p->nr_decl_insn = 0; - memset(p->constant_flags, 0, sizeof(p->constant_flags)); - p->csr = p->program; p->decl = p->declarations; p->decl_s = 0; p->decl_t = 0; - p->temp_flag = 0xffff000; + p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; p->utemp_flag = ~0x7; p->wpos_tex = -1; @@ -993,6 +1080,7 @@ i915_init_compile(struct i915_context *i915, static void i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) { + struct i915_fragment_shader *ifs = p->shader; unsigned long program_size = (unsigned long) (p->csr - p->program); unsigned long decl_size = (unsigned long) (p->decl - p->declarations); @@ -1008,19 +1096,13 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) if (p->nr_decl_insn > I915_MAX_DECL_INSN) i915_program_error(p, "Exceeded max DECL instructions"); - /* free old program, if present */ - if (i915->current.program) { - FREE(i915->current.program); - i915->current.program_len = 0; - } - if (p->error) { p->NumNativeInstructions = 0; p->NumNativeAluInstructions = 0; p->NumNativeTexInstructions = 0; p->NumNativeTexIndirections = 0; - i915_use_passthrough_shader(i915); + i915_use_passthrough_shader(ifs); } else { p->NumNativeInstructions @@ -1034,24 +1116,20 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) /* Copy compilation results to fragment program struct: */ - i915->current.program + assert(!ifs->program); + ifs->program = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); - if (i915->current.program) { - i915->current.program_len = program_size + decl_size; + if (ifs->program) { + ifs->program_len = program_size + decl_size; - memcpy(i915->current.program, + memcpy(ifs->program, p->declarations, decl_size * sizeof(uint)); - memcpy(i915->current.program + decl_size, + memcpy(ifs->program + decl_size, p->program, program_size * sizeof(uint)); } - - /* update number of constants */ - i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants; - assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT] - >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]); } /* Release the compilation struct: @@ -1085,7 +1163,7 @@ i915_find_wpos_space(struct i915_fp_compile *p) i915_program_error(p, "No free texcoord for wpos value"); } #else - if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->state.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { /* frag shader using the fragment position input */ #if 0 assert(0); @@ -1106,7 +1184,7 @@ static void i915_fixup_depth_write(struct i915_fp_compile *p) { /* XXX assuming pos/depth is always in output[0] */ - if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->state.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { const uint depth = UREG(REG_TYPE_OD, 0); i915_emit_arith(p, @@ -1121,13 +1199,18 @@ i915_fixup_depth_write(struct i915_fp_compile *p) void -i915_translate_fragment_program( struct i915_context *i915 ) +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs) { - struct i915_fp_compile *p = i915_init_compile(i915, i915->fs); - const struct tgsi_token *tokens = i915->fs->tokens; + struct i915_fp_compile *p = i915_init_compile(i915, fs); + const struct tgsi_token *tokens = fs->state.tokens; i915_find_wpos_space(p); +#if 0 + tgsi_dump(tokens, 0); +#endif + i915_translate_instructions(p, tokens); i915_fixup_depth_write(p); diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index 44c43259369..d8de5178f60 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -72,38 +72,42 @@ emit_hw_vertex( struct i915_context *i915, uint i; uint count = 0; /* for debug/sanity */ + assert(!i915->dirty); + for (i = 0; i < vinfo->num_attribs; i++) { + const uint j = vinfo->src_index[i]; + const float *attrib = vertex->data[j]; switch (vinfo->emit[i]) { case EMIT_OMIT: /* no-op */ break; case EMIT_1F: - OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(attrib[0]) ); count++; break; case EMIT_2F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); count += 2; break; case EMIT_3F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); - OUT_BATCH( fui(vertex->data[i][2]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); count += 3; break; case EMIT_4F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); - OUT_BATCH( fui(vertex->data[i][2]) ); - OUT_BATCH( fui(vertex->data[i][3]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + OUT_BATCH( fui(attrib[3]) ); count += 4; break; case EMIT_4UB: - OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ), - float_to_ubyte( vertex->data[i][1] ), - float_to_ubyte( vertex->data[i][0] ), - float_to_ubyte( vertex->data[i][3] )) ); + OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), + float_to_ubyte( attrib[1] ), + float_to_ubyte( attrib[0] ), + float_to_ubyte( attrib[3] )) ); count += 1; break; default: @@ -122,17 +126,19 @@ emit_prim( struct draw_stage *stage, unsigned nr ) { struct i915_context *i915 = setup_stage(stage)->i915; - unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + unsigned vertex_size; unsigned i; - assert(vertex_size >= 12); /* never smaller than 12 bytes */ - if (i915->dirty) i915_update_derived( i915 ); if (i915->hardware_dirty) i915_emit_hardware_state( i915 ); + /* need to do this after validation! */ + vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { FLUSH_BATCH(); diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index c5bf6174f68..9d5f609220a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -83,6 +83,12 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; + + if (i915->dirty) { + /* make sure we have up to date vertex layout */ + i915_update_derived( i915 ); + } + return &i915->current.vertex_info; } @@ -143,7 +149,8 @@ i915_vbuf_render_draw( struct vbuf_render *render, assert(nr_indices); - assert((i915->dirty & ~I915_NEW_VBO) == 0); + /* this seems to be bogus, since we validate state right after this */ + /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/ if (i915->dirty) i915_update_derived( i915 ); diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e055eed7e02..a35bdf941fc 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -38,6 +38,7 @@ #include "i915_reg.h" #include "i915_state.h" #include "i915_state_inlines.h" +#include "i915_fpc.h" /* The i915 (and related graphics cores) do not support GL_CLAMP. The @@ -416,26 +417,47 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe, } -static void * i915_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) + +static void * +i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) { - return 0; + struct i915_context *i915 = i915_context(pipe); + struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); + if (!ifs) + return NULL; + + ifs->state = *templ; + + /* The shader's compiled to i915 instructions here */ + i915_translate_fragment_program(i915, ifs); + + return ifs; } -static void i915_bind_fs_state(struct pipe_context *pipe, void *fs) +static void +i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); - i915->fs = (struct pipe_shader_state *)fs; + i915->fs = (struct i915_fragment_shader*) shader; i915->dirty |= I915_NEW_FS; } -static void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +static +void i915_delete_fs_state(struct pipe_context *pipe, void *shader) { - /*do nothing*/ + struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; + + if (ifs->program) + FREE(ifs->program); + ifs->program_len = 0; + + FREE(ifs); } + static void * i915_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -452,6 +474,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) /* just pass-through to draw module */ draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); + + i915->dirty |= I915_NEW_VS; } static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 4767584fc60..5cf70acdf3b 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -27,104 +27,111 @@ #include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" #include "i915_fpc.h" -#include "pipe/p_shader_tokens.h" + /** - * Determine which post-transform / pre-rasterization vertex attributes - * we need. - * Derived from: fs, setup states. + * Determine the hardware vertex layout. + * Depends on vertex/fragment shader state. */ static void calculate_vertex_layout( struct i915_context *i915 ) { - const struct pipe_shader_state *fs = i915->fs; + const struct pipe_shader_state *fs = &i915->fs->state; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - uint front0 = 0, back0 = 0, front1 = 0, back1 = 0; - boolean needW = 0; + boolean texCoords[8], colors[2], fog, needW; uint i; - boolean texCoords[8]; - uint src = 0; + int src; memset(texCoords, 0, sizeof(texCoords)); + colors[0] = colors[1] = fog = needW = FALSE; memset(&vinfo, 0, sizeof(vinfo)); - /* pos */ - draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++); - /* Note: we'll set the S4_VFMT_XYZ[W] bits below */ - + /* Determine which fragment program inputs are needed. Setup HW vertex + * layout below, in the HW-specific attribute order. + */ for (i = 0; i < fs->num_inputs; i++) { switch (fs->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: break; case TGSI_SEMANTIC_COLOR: - if (fs->input_semantic_index[i] == 0) { - front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); - vinfo.hwfmt[0] |= S4_VFMT_COLOR; - } - else { - assert(fs->input_semantic_index[i] == 1); - front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); - vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; - } + assert(fs->input_semantic_index[i] < 2); + colors[fs->input_semantic_index[i]] = TRUE; break; case TGSI_SEMANTIC_GENERIC: /* usually a texcoord */ { const uint unit = fs->input_semantic_index[i]; - uint hwtc; + assert(unit < 8); texCoords[unit] = TRUE; - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - hwtc = TEXCOORDFMT_4D; needW = TRUE; - vinfo.hwfmt[1] |= hwtc << (unit * 4); } break; case TGSI_SEMANTIC_FOG: - debug_printf("i915 fogcoord not implemented yet\n"); - draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++); + fog = TRUE; break; default: assert(0); } - } - /* finish up texcoord fields */ - for (i = 0; i < 8; i++) { - if (!texCoords[i]) { - const uint hwtc = TEXCOORDFMT_NOT_PRESENT; - vinfo.hwfmt[1] |= hwtc << (i* 4); - } - } - - /* go back and fill in the vertex position info now that we have needW */ + + /* pos */ + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; vinfo.emit[0] = EMIT_4F; } else { + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZ; vinfo.emit[0] = EMIT_3F; } - /* Additional attributes required for setup: Just twosided - * lighting. Edgeflag is dealt with specially by setting bits in - * the vertex header. - */ - if (i915->rasterizer->light_twoside) { - if (front0) { - back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + /* hardware point size */ + /* XXX todo */ + + /* primary color */ + if (colors[0]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + + /* secondary color */ + if (colors[1]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + + /* fog coord, not fog blend factor */ + if (fog) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; + } + + /* texcoords */ + for (i = 0; i < 8; i++) { + uint hwtc; + if (texCoords[i]) { + hwtc = TEXCOORDFMT_4D; + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } - if (back0) { - back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + else { + hwtc = TEXCOORDFMT_NOT_PRESENT; } + vinfo.hwfmt[1] |= hwtc << (i * 4); } draw_compute_vertex_size(&vinfo); @@ -148,7 +155,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) */ void i915_update_derived( struct i915_context *i915 ) { - if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS)) + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) calculate_vertex_layout( i915 ); if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) @@ -164,7 +171,6 @@ void i915_update_derived( struct i915_context *i915 ) i915_update_dynamic( i915 ); if (i915->dirty & I915_NEW_FS) { - i915_translate_fragment_program(i915); i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ } diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 3339287f498..6bbaac4e34c 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -99,7 +99,11 @@ i915_emit_hardware_state(struct i915_context *i915 ) 2 + I915_TEX_UNITS*3 + 2 + I915_TEX_UNITS*3 + 2 + I915_MAX_CONSTANT*4 + +#if 0 i915->current.program_len + +#else + i915->fs->program_len + +#endif 6 ) * 3/2; /* plus 50% margin */ const unsigned relocs = ( I915_TEX_UNITS + @@ -325,15 +329,34 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_PROGRAM) { - const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT]; - assert(nr <= I915_MAX_CONSTANT); - if (nr > 0) { - const uint *c - = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT]; + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { uint i; + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif OUT_BATCH(*c++); OUT_BATCH(*c++); OUT_BATCH(*c++); @@ -348,9 +371,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) { uint i; /* we should always have, at least, a pass-through program */ - assert(i915->current.program_len > 0); - for (i = 0; i < i915->current.program_len; i++) { - OUT_BATCH(i915->current.program[i]); + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); } } diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index d581ee8d3ca..4c1a6d5df0b 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -5,6 +5,9 @@ env = env.Clone() softpipe = env.ConvenienceLibrary( target = 'softpipe', source = [ + 'sp_fs_exec.c', + 'sp_fs_sse.c', + 'sp_fs_llvm.c', 'sp_clear.c', 'sp_context.c', 'sp_draw_arrays.c', diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 8cb0534342d..d5bd7a702f1 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -81,7 +81,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, static void -exec_prepare( struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers ) { @@ -98,7 +98,7 @@ exec_prepare( struct sp_fragment_shader *base, * interface: */ static unsigned -exec_run( struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 22da4714533..34b2b7d4e24 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -146,7 +146,7 @@ shade_quad_llvm(struct quad_stage *qs, unsigned -run_llvm_fs( struct sp_fragment_shader *base, +run_llvm_fs( const struct sp_fragment_shader *base, struct foo *machine ) { } diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 7b1e131ee14..b6a3fddb29c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -1165,6 +1165,10 @@ static void setup_begin( struct draw_stage *stage ) struct softpipe_context *sp = setup->softpipe; const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + if (sp->dirty) { + softpipe_update_derived(sp); + } + setup->quad.nr_attrs = fs->num_inputs; sp->quad.first->begin(sp->quad.first); diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 15b5594547d..142dbcc7710 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -56,11 +56,12 @@ sp_build_depth_stencil( void sp_build_quad_pipeline(struct softpipe_context *sp) { - boolean early_depth_test = + boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION; + !sp->fs->uses_kill && + !sp->fs->writes_z; /* build up the pipeline in reverse order... */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index ef8cf67d4c3..5aaa9e346bc 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -63,14 +63,17 @@ struct tgsi_exec_machine; struct sp_fragment_shader { struct pipe_shader_state shader; - void (*prepare)( struct sp_fragment_shader *shader, + boolean uses_kill; + boolean writes_z; + + void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); /* Run the shader - this interface will get cleaned up in the * future: */ - unsigned (*run)( struct sp_fragment_shader *shader, + unsigned (*run)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct quad_header *quad ); diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f9f2c5eaa8f..4c6313001f4 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -35,33 +35,6 @@ /** - * Search vertex program's outputs to find a match for the given - * semantic name/index. Return the index of the output slot. - * - * Return 0 if not found. This will cause the fragment program to use - * vertex attrib 0 (position) in the cases where the fragment program - * attempts to use a missing vertex program output. This is an undefined - * condition that users shouldn't hit anyway. - */ -static int -find_vs_output(struct softpipe_context *sp, - const struct pipe_shader_state *vs, - uint semantic_name, - uint semantic_index) -{ - uint i; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == semantic_name && - vs->output_semantic_index[i] == semantic_index) - return i; - } - - /* See if the draw module is introducing a new attribute... */ - return draw_find_vs_output(sp->draw, semantic_name, semantic_index); -} - - -/** * Mark the current vertex layout as "invalid". * We'll validate the vertex layout later, when we start to actually * render a point or line or tri. @@ -114,24 +87,25 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) int src; switch (fs->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_COLOR, + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_FOG, 0); + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_GENERIC, + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; @@ -141,7 +115,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } } - softpipe->psize_slot = find_vs_output(softpipe, vs, TGSI_SEMANTIC_PSIZE, 0); + softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, softpipe->psize_slot); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b0238f81737..b184ac61bb9 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -36,6 +36,7 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_scan.h" void * @@ -44,21 +45,24 @@ softpipe_create_fs_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state; + struct tgsi_shader_info info; + + tgsi_scan_shader(templ->tokens, &info); if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); state = softpipe_create_fs_llvm( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_sse( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_exec( softpipe, templ ); - + if (!state) { + state = softpipe_create_fs_sse( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_exec( softpipe, templ ); + } + } assert(state); + state->uses_kill = (info.opcode_count[TGSI_OPCODE_KIL] || + info.opcode_count[TGSI_OPCODE_KILP]); + state->writes_z = info.writes_z; return state; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 43d5085895f..0ced585c7f3 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -343,7 +343,7 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: i = ifloor(s); - return CLAMP(i, 0, size-1); + return CLAMP(i, 0, (int) size-1); case PIPE_TEX_WRAP_CLAMP_TO_EDGE: /* fall-through */ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: @@ -366,7 +366,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: /* Not exactly what the spec says, but it matches NVIDIA output */ - s = CLAMP(s - 0.5F, 0.0, (float) size - 1.0); + s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f); *i0 = ifloor(s); *i1 = *i0 + 1; break; @@ -377,7 +377,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, s -= 0.5F; *i0 = ifloor(s); *i1 = *i0 + 1; - if (*i1 > size - 1) + if (*i1 > (int) size - 1) *i1 = size - 1; break; default: |