summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorBen Skeggs <[email protected]>2008-02-27 00:34:31 +1100
committerBen Skeggs <[email protected]>2008-02-27 00:34:31 +1100
commit68ef52886263690632552ae187a4673945c2ab74 (patch)
tree0453c0063397c196ebe5e3dcd4d9c91392496d77 /src/gallium/drivers
parent026e2fd3c6eb87a010a9c90341e8a77b09376b5b (diff)
parentad6bb870de6103ed240fa1f9f828bd13a4401a9a (diff)
Merge branch 'upstream-gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.h4
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_shader.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.c52
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c8
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c5
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h5
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c2
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c4
-rw-r--r--src/gallium/drivers/i915simple/i915_context.h43
-rw-r--r--src/gallium/drivers/i915simple/i915_flush.c3
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc.h23
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_emit.c163
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_translate.c183
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_emit.c40
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c9
-rw-r--r--src/gallium/drivers/i915simple/i915_state.c38
-rw-r--r--src/gallium/drivers/i915simple/i915_state_derived.c104
-rw-r--r--src/gallium/drivers/i915simple/i915_state_emit.c39
-rw-r--r--src/gallium/drivers/softpipe/SConscript3
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_llvm.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_setup.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h7
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c39
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c22
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c6
29 files changed, 512 insertions, 314 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 3b687bb8689..1433a4925fa 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -134,6 +134,8 @@ extern void
cell_vertex_shader_queue_flush(struct draw_context *draw);
+/* XXX find a better home for this */
+extern void cell_update_vertex_fetch(struct draw_context *draw);
#endif /* CELL_CONTEXT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index cbd387f0142..c839fb4d12d 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -38,6 +38,7 @@
#include "cell_context.h"
#include "cell_draw_arrays.h"
#include "cell_state.h"
+#include "cell_flush.h"
#include "draw/draw_context.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h
index eda351b1cbc..7f940ae76b6 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.h
+++ b/src/gallium/drivers/cell/ppu/cell_flush.h
@@ -35,4 +35,8 @@ cell_flush(struct pipe_context *pipe, unsigned flags);
extern void
cell_flush_int(struct pipe_context *pipe, unsigned flags);
+extern void
+cell_flush_buffer_range(struct cell_context *cell, void *ptr,
+ unsigned size);
+
#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
index f7ef72e5a2c..f5c27852c14 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
@@ -55,7 +55,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
struct cell_command_vs *const vs = &cell_global.command[0].vs;
uint64_t *batch;
struct cell_array_info *array_info;
- struct cell_shader_info *shader_info;
unsigned i, j;
struct cell_attribute_fetch_code *cf;
@@ -123,12 +122,12 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
for (j = 0; j < n; j++) {
vs->elts[j] = draw->vs.queue[i + j].elt;
- vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest;
+ vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
}
for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) {
vs->elts[j] = vs->elts[0];
- vs->vOut[j] = vs->vOut[0];
+ vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
}
vs->num_elts = n;
@@ -137,5 +136,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT);
}
+ draw->vs.post_nr = draw->vs.queue_nr;
draw->vs.queue_nr = 0;
}
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
index 698a5790bb0..a1701d80d18 100644
--- a/src/gallium/drivers/cell/spu/spu_dcache.c
+++ b/src/gallium/drivers/cell/spu/spu_dcache.c
@@ -33,7 +33,7 @@
#define CACHE_NAME data
#define CACHED_TYPE qword
#define CACHE_TYPE CACHE_TYPE_RO
-#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER
+#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
#define CACHE_LOG2NNWAY 2
#define CACHE_LOG2NSETS 6
#include <cache-api.h>
@@ -49,43 +49,57 @@
/**
* Fetch between arbitrary number of bytes from an unaligned address
+ *
+ * \param dst Destination data buffer
+ * \param ea Main memory effective address of source data
+ * \param size Number of bytes to read
+ *
+ * \warning
+ * As is hinted by the type of the \c dst pointer, this function writes
+ * multiples of 16-bytes.
*/
void
spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size)
{
const int shift = ea & 0x0f;
- const unsigned aligned_start_ea = ea & ~0x0f;
- const unsigned aligned_end_ea = ROUNDUP16(ea + size);
- const unsigned num_entries = (aligned_end_ea - aligned_start_ea) / 16;
+ const unsigned read_size = ROUNDUP16(size + shift);
+ const unsigned last_read = ROUNDUP16(ea + size);
+ const qword *const last_write = dst + (ROUNDUP16(size) / 16);
unsigned i;
if (shift == 0) {
/* Data is already aligned. Fetch directly into the destination buffer.
*/
- for (i = 0; i < num_entries; i++) {
- dst[i] = cache_rd(data, ea + (i * 16));
+ for (i = 0; i < size; i += 16) {
+ *(dst++) = cache_rd(data, ea + i);
}
} else {
- qword tmp[2] ALIGN16_ATTRIB;
-
+ qword hi;
- tmp[0] = cache_rd(data, (ea & ~0x0f));
- for (i = 0; i < (num_entries & ~1); i++) {
- const unsigned curr = i & 1;
- const unsigned next = curr ^ 1;
- tmp[next] = cache_rd(data, (ea & ~0x0f) + (next * 16));
-
- dst[i] = si_or((qword) spu_slqwbyte(tmp[curr], shift),
- (qword) spu_rlmaskqwbyte(tmp[next], shift - 16));
+ /* Please exercise extreme caution when modifying this code. This code
+ * must not read past the end of the page containing the source data,
+ * and it must not write more than ((size + 15) / 16) qwords to the
+ * destination buffer.
+ */
+ ea &= ~0x0f;
+ hi = cache_rd(data, ea);
+ for (i = 16; i < read_size; i += 16) {
+ qword lo = cache_rd(data, ea + i);
+
+ *(dst++) = si_or((qword) spu_slqwbyte(hi, shift),
+ (qword) spu_rlmaskqwbyte(lo, shift - 16));
+ hi = lo;
}
- if (i < num_entries) {
- dst[i] = si_or((qword) spu_slqwbyte(tmp[(i & 1)], shift),
- si_il(0));
+ if (dst != last_write) {
+ *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0));
}
}
+
+ ASSERT((ea + i) == last_read);
+ ASSERT(dst == last_write);
}
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index cf81bee8fde..1560c0f1574 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -50,8 +50,6 @@
* Brian Paul
*/
-#include <libmisc.h>
-#include <spu_mfcio.h>
#include <transpose_matrix4x4.h>
#include <simdmath/ceilf4.h>
#include <simdmath/cosf4.h>
@@ -151,6 +149,7 @@ spu_exec_machine_init(struct spu_exec_machine *mach,
const qword zero = si_il(0);
const qword not_zero = si_il(~0);
+ (void) numSamplers;
mach->Samplers = samplers;
mach->Processor = processor;
mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
@@ -659,9 +658,10 @@ fetch_texel( struct spu_sampler *sampler,
qword rgba[4];
qword out[4];
- sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba);
+ sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
+ (float (*)[4]) rgba);
- _transpose_matrix4x4(out, rgba);
+ _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
r->q = out[0];
g->q = out[1];
b->q = out[2];
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 1136dba62d5..cc4bafdb3ac 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -38,6 +38,7 @@
#include "spu_tile.h"
//#include "spu_test.h"
#include "spu_vertex_shader.h"
+#include "spu_dcache.h"
#include "cell/common.h"
#include "pipe/p_defines.h"
@@ -434,7 +435,7 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
break;
case CELL_CMD_STATE_UNIFORMS:
- draw.constants = (float (*)[4]) (uintptr_t) buffer[pos + 1];
+ draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
pos += 2;
break;
case CELL_CMD_STATE_VS_ARRAY_INFO:
@@ -583,7 +584,7 @@ main(main_param_t speid, main_param_t argp)
one_time_init();
if (Debug)
- printf("SPU: main() speid=%lu\n", speid);
+ printf("SPU: main() speid=%lu\n", (unsigned long) speid);
mfc_get(&spu.init, /* dest */
(unsigned int) argp, /* src */
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 5c95d112ac1..d14f1abbe74 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -131,7 +131,10 @@ extern boolean Debug;
#define TAG_BATCH_BUFFER 17
#define TAG_MISC 18
#define TAG_TEXTURE_TILE 19
-#define TAG_INSTRUCTION_FETCH 20
+#define TAG_DCACHE0 20
+#define TAG_DCACHE1 21
+#define TAG_DCACHE2 22
+#define TAG_DCACHE3 23
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
index f7e4e653e31..219fd90cc0e 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -32,8 +32,6 @@
* Ian Romanick <[email protected]>
*/
-#include <spu_mfcio.h>
-
#include "pipe/p_util.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index acfa3494397..c3955bbd2dd 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -298,10 +298,12 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys,
i915_init_string_functions(i915);
i915_init_texture_functions(i915);
+ draw_install_aaline_stage(i915->draw, &i915->pipe);
+ draw_install_aapoint_stage(i915->draw, &i915->pipe);
+
i915->pci_id = pci_id;
i915->flags.is_i945 = is_i945;
-
i915->dirty = ~0;
i915->hardware_dirty = ~0;
diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h
index 2d876925b2c..d32dded6bdc 100644
--- a/src/gallium/drivers/i915simple/i915_context.h
+++ b/src/gallium/drivers/i915simple/i915_context.h
@@ -79,6 +79,40 @@
#define I915_MAX_CONSTANT 32
+/** See constant_flags[] below */
+#define I915_CONSTFLAG_USER 0x1f
+
+
+/**
+ * Subclass of pipe_shader_state
+ */
+struct i915_fragment_shader
+{
+ struct pipe_shader_state state;
+ uint *program;
+ uint program_len;
+
+ /**
+ * constants introduced during translation.
+ * These are placed at the end of the constant buffer and grow toward
+ * the beginning (eg: slot 31, 30 29, ...)
+ * User-provided constants start at 0.
+ * This allows both types of constants to co-exist (until there's too many)
+ * and doesn't require regenerating/changing the fragment program to
+ * shuffle constants around.
+ */
+ uint num_constants;
+ float constants[I915_MAX_CONSTANT][4];
+
+ /**
+ * Status of each constant
+ * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding
+ * slot of the user's constant buffer. (set by pipe->set_constant_buffer())
+ * Else, the bitmask indicates which components are occupied by immediates.
+ */
+ ubyte constant_flags[I915_MAX_CONSTANT];
+};
+
struct i915_cache_context;
@@ -93,11 +127,6 @@ struct i915_state
float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4];
/** number of constants passed in through a constant buffer */
uint num_user_constants[PIPE_SHADER_TYPES];
- /** user constants, plus extra constants from shader translation */
- uint num_constants[PIPE_SHADER_TYPES];
-
- uint *program;
- uint program_len;
/* texture sampler state */
unsigned sampler[I915_TEX_UNITS][3];
@@ -187,7 +216,8 @@ struct i915_context
const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS];
const struct i915_depth_stencil_state *depth_stencil;
const struct i915_rasterizer_state *rasterizer;
- const struct pipe_shader_state *fs;
+
+ struct i915_fragment_shader *fs;
struct pipe_blend_color blend_color;
struct pipe_clip_state clip;
@@ -233,6 +263,7 @@ struct i915_context
#define I915_NEW_TEXTURE 0x800
#define I915_NEW_CONSTANTS 0x1000
#define I915_NEW_VBO 0x2000
+#define I915_NEW_VS 0x4000
/* Driver's internally generated state flags:
diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c
index 3c2069b8273..96a54281f11 100644
--- a/src/gallium/drivers/i915simple/i915_flush.c
+++ b/src/gallium/drivers/i915simple/i915_flush.c
@@ -31,6 +31,7 @@
#include "pipe/p_defines.h"
+#include "draw/draw_context.h"
#include "i915_context.h"
#include "i915_reg.h"
#include "i915_batch.h"
@@ -44,6 +45,8 @@ static void i915_flush( struct pipe_context *pipe,
{
struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
/* Do we need to emit an MI_FLUSH command to flush the hardware
* caches?
*/
diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h
index 8c7b68aefb5..250dfe6dbf0 100644
--- a/src/gallium/drivers/i915simple/i915_fpc.h
+++ b/src/gallium/drivers/i915simple/i915_fpc.h
@@ -44,9 +44,16 @@
* Program translation state
*/
struct i915_fp_compile {
- const struct pipe_shader_state *shader;
+ struct i915_fragment_shader *shader; /* the shader we're compiling */
- struct vertex_info *vertex_info;
+ boolean used_constants[I915_MAX_CONSTANT];
+
+ /** maps TGSI immediate index to constant slot */
+ uint num_immediates;
+ uint immediates_map[I915_MAX_CONSTANT];
+ float immediates[I915_MAX_CONSTANT][4];
+
+ boolean first_instruction;
uint declarations[I915_PROGRAM_SIZE];
uint program[I915_PROGRAM_SIZE];
@@ -57,11 +64,6 @@ struct i915_fp_compile {
uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
- /** points into the i915->current.constants array: */
- float (*constants)[4];
- uint num_constants;
- uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */
-
uint *csr; /**< Cursor, points into program. */
uint *decl; /**< Cursor, points into declarations. */
@@ -155,7 +157,9 @@ swizzle(int reg, uint x, uint y, uint z, uint w)
/***********************************************************************
* Public interface for the compiler
*/
-extern void i915_translate_fragment_program( struct i915_context *i915 );
+extern void
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs);
@@ -206,8 +210,5 @@ extern void i915_disassemble_program(const uint * program, uint sz);
extern void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
-extern void
-i915_translate_fragment_program(struct i915_context *i915);
-
#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c
index 74924ff0a1d..4bdeefb449b 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_emit.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c
@@ -61,8 +61,6 @@
(REG_NR_MASK << UREG_NR_SHIFT))
-#define I915_CONSTFLAG_PARAM 0x1f
-
uint
i915_get_temp(struct i915_fp_compile *p)
{
@@ -73,10 +71,21 @@ i915_get_temp(struct i915_fp_compile *p)
}
p->temp_flag |= 1 << (bit - 1);
- return UREG(REG_TYPE_R, (bit - 1));
+ return bit - 1;
+}
+
+
+static void
+i915_release_temp(struct i915_fp_compile *p, int reg)
+{
+ p->temp_flag &= ~(1 << reg);
}
+/**
+ * Get unpreserved temporary, a temp whose value is not preserved between
+ * PS program phases.
+ */
uint
i915_get_utemp(struct i915_fp_compile * p)
{
@@ -185,41 +194,62 @@ i915_emit_arith(struct i915_fp_compile * p,
return dest;
}
+
+/**
+ * Emit a texture load or texkill instruction.
+ * \param dest the dest i915 register
+ * \param destmask the dest register writemask
+ * \param sampler the i915 sampler register
+ * \param coord the i915 source texcoord operand
+ * \param opcode the instruction opcode
+ */
uint i915_emit_texld( struct i915_fp_compile *p,
uint dest,
uint destmask,
uint sampler,
uint coord,
- uint op )
+ uint opcode )
{
- uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+ const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+ int temp = -1;
+
if (coord != k) {
- /* No real way to work around this in the general case - need to
- * allocate and declare a new temporary register (a utemp won't
- * do). Will fallback for now.
+ /* texcoord is swizzled or negated. Need to allocate a new temporary
+ * register (a utemp / unpreserved temp) won't do.
*/
- i915_program_error(p, "Can't (yet) swizzle TEX arguments");
- assert(0);
- return 0;
+ uint tempReg;
+
+ temp = i915_get_temp(p); /* get temp reg index */
+ tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */
+
+ i915_emit_arith( p, A0_MOV,
+ tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
+ 0, /* saturate */
+ coord, 0, 0 ); /* src0, src1, src2 */
+
+ /* new src texcoord is tempReg */
+ coord = tempReg;
}
/* Don't worry about saturate as we only support
*/
if (destmask != A0_DEST_CHANNEL_ALL) {
+ /* if not writing to XYZW... */
uint tmp = i915_get_utemp(p);
- i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+ i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode );
i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
- return dest;
+ /* XXX release utemp here? */
}
else {
assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+ /* is the sampler coord a texcoord input reg? */
if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
p->nr_tex_indirect++;
}
- *(p->csr++) = (op |
+ *(p->csr++) = (opcode |
T0_DEST( dest ) |
T0_SAMPLER( sampler ));
@@ -227,14 +257,19 @@ uint i915_emit_texld( struct i915_fp_compile *p,
*(p->csr++) = T2_MBZ;
p->nr_tex_insn++;
- return dest;
}
+
+ if (temp >= 0)
+ i915_release_temp(p, temp);
+
+ return dest;
}
uint
i915_emit_const1f(struct i915_fp_compile * p, float c0)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
@@ -243,15 +278,15 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0)
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 4; idx++) {
- if (!(p->constant_flags[reg] & (1 << idx)) ||
- p->constants[reg][idx] == c0) {
- p->constants[reg][idx] = c0;
- p->constant_flags[reg] |= 1 << idx;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ if (!(ifs->constant_flags[reg] & (1 << idx)) ||
+ ifs->constants[reg][idx] == c0) {
+ ifs->constants[reg][idx] = c0;
+ ifs->constant_flags[reg] |= 1 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
}
}
@@ -264,6 +299,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0)
uint
i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
@@ -277,16 +313,16 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == 0xf ||
- p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ if (ifs->constant_flags[reg] == 0xf ||
+ ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 3; idx++) {
- if (!(p->constant_flags[reg] & (3 << idx))) {
- p->constants[reg][idx + 0] = c0;
- p->constants[reg][idx + 1] = c1;
- p->constant_flags[reg] |= 3 << idx;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ if (!(ifs->constant_flags[reg] & (3 << idx))) {
+ ifs->constants[reg][idx + 0] = c0;
+ ifs->constants[reg][idx + 1] = c1;
+ ifs->constant_flags[reg] |= 3 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
}
}
@@ -302,25 +338,26 @@ uint
i915_emit_const4f(struct i915_fp_compile * p,
float c0, float c1, float c2, float c3)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg;
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == 0xf &&
- p->constants[reg][0] == c0 &&
- p->constants[reg][1] == c1 &&
- p->constants[reg][2] == c2 &&
- p->constants[reg][3] == c3) {
+ if (ifs->constant_flags[reg] == 0xf &&
+ ifs->constants[reg][0] == c0 &&
+ ifs->constants[reg][1] == c1 &&
+ ifs->constants[reg][2] == c2 &&
+ ifs->constants[reg][3] == c3) {
return UREG(REG_TYPE_CONST, reg);
}
- else if (p->constant_flags[reg] == 0) {
-
- p->constants[reg][0] = c0;
- p->constants[reg][1] = c1;
- p->constants[reg][2] = c2;
- p->constants[reg][3] = c3;
- p->constant_flags[reg] = 0xf;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ else if (ifs->constant_flags[reg] == 0) {
+
+ ifs->constants[reg][0] = c0;
+ ifs->constants[reg][1] = c1;
+ ifs->constants[reg][2] = c2;
+ ifs->constants[reg][3] = c3;
+ ifs->constant_flags[reg] = 0xf;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return UREG(REG_TYPE_CONST, reg);
}
}
@@ -335,41 +372,3 @@ i915_emit_const4fv(struct i915_fp_compile * p, const float * c)
{
return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
}
-
-
-#if 00000/*UNUSED*/
-/* Reserve a slot in the constant file for a Mesa state parameter.
- * These will later need to be tracked on statechanges, but that is
- * done elsewhere.
- */
-uint
-i915_emit_param4fv(struct i915_fp_compile * p, const float * values)
-{
- struct i915_fragment_program *fp = p->fp;
- int i;
-
- for (i = 0; i < fp->nr_params; i++) {
- if (fp->param[i].values == values)
- return UREG(REG_TYPE_CONST, fp->param[i].reg);
- }
-
- if (p->constants->nr_constants == I915_MAX_CONSTANT ||
- fp->nr_params == I915_MAX_CONSTANT) {
- i915_program_error(p, "i915_emit_param4fv: out of constants\n");
- return 0;
- }
-
- {
- int reg = p->constants->nr_constants++;
- int i = fp->nr_params++;
-
- assert (p->constant_flags[reg] == 0);
- p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
-
- fp->param[i].values = values;
- fp->param[i].reg = reg;
-
- return UREG(REG_TYPE_CONST, reg);
- }
-}
-#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 6c1524c768e..76a2184e9ab 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -34,6 +34,7 @@
#include "pipe/p_shader_tokens.h"
#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_dump.h"
#include "draw/draw_vertex.h"
@@ -97,19 +98,19 @@ negate(int reg, int x, int y, int z, int w)
}
+/**
+ * In the event of a translation failure, we'll generate a simple color
+ * pass-through program.
+ */
static void
-i915_use_passthrough_shader(struct i915_context *i915)
+i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
- debug_printf("**** Using i915 pass-through fragment shader\n");
-
- i915->current.program = (uint *) MALLOC(sizeof(passthrough));
- if (i915->current.program) {
- memcpy(i915->current.program, passthrough, sizeof(passthrough));
- i915->current.program_len = Elements(passthrough);
+ fs->program = (uint *) MALLOC(sizeof(passthrough));
+ if (fs->program) {
+ memcpy(fs->program, passthrough, sizeof(passthrough));
+ fs->program_len = Elements(passthrough);
}
-
- i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0;
- i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0;
+ fs->num_constants = 0;
}
@@ -161,9 +162,6 @@ src_vector(struct i915_fp_compile *p,
* We also use a texture coordinate to pass wpos when possible.
*/
- /* use vertex format info to map a slot number to a VF attrib */
- assert(index < p->vertex_info->num_attribs);
-
sem_name = p->input_semantic_name[index];
sem_ind = p->input_semantic_index[index];
@@ -201,7 +199,8 @@ src_vector(struct i915_fp_compile *p,
break;
case TGSI_FILE_IMMEDIATE:
- /* XXX unfinished - need to append immediates onto const buffer */
+ assert(index < p->num_immediates);
+ index = p->immediates_map[index];
/* fall-through */
case TGSI_FILE_CONSTANT:
src = UREG(REG_TYPE_CONST, index);
@@ -386,6 +385,26 @@ emit_simple_arith(struct i915_fp_compile *p,
arg3 );
}
+
+/** As above, but swap the first two src regs */
+static void
+emit_simple_arith_swap2(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode, uint numArgs)
+{
+ struct tgsi_full_instruction inst2;
+
+ assert(numArgs == 2);
+
+ /* transpose first two registers */
+ inst2 = *inst;
+ inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
+ inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
+
+ emit_simple_arith(p, &inst2, opcode, numArgs);
+}
+
+
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
@@ -556,8 +575,12 @@ i915_translate_instruction(struct i915_fp_compile *p,
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
tmp = i915_get_utemp(p);
- i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
- 0, src0, T0_TEXKILL);
+ i915_emit_texld(p,
+ tmp, /* dest reg: a dummy reg */
+ A0_DEST_CHANNEL_ALL, /* dest writemask */
+ 0, /* sampler */
+ src0, /* coord*/
+ T0_TEXKILL); /* opcode */
break;
case TGSI_OPCODE_LG2:
@@ -773,6 +796,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_SGE, 2);
break;
+ case TGSI_OPCODE_SLE:
+ /* like SGE, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SGE, 2);
+ break;
+
case TGSI_OPCODE_SIN:
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
tmp = i915_get_utemp(p);
@@ -827,6 +855,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_SLT, 2);
break;
+ case TGSI_OPCODE_SGT:
+ /* like SLT, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SLT, 2);
+ break;
+
case TGSI_OPCODE_SUB:
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
src1 = src_vector(p, &inst->FullSrcRegisters[1]);
@@ -880,6 +913,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
default:
i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
+ p->error = 1;
return;
}
@@ -896,6 +930,7 @@ static void
i915_translate_instructions(struct i915_fp_compile *p,
const struct tgsi_token *tokens)
{
+ struct i915_fragment_shader *ifs = p->shader;
struct tgsi_parse_context parse;
tgsi_parse_init( &parse, tokens );
@@ -928,13 +963,64 @@ i915_translate_instructions(struct i915_fp_compile *p,
p->output_semantic_name[ind] = sem;
p->output_semantic_index[ind] = semi;
}
+ else if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_CONSTANT) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+ i++) {
+ assert(ifs->constant_flags[i] == 0x0);
+ ifs->constant_flags[i] = I915_CONSTFLAG_USER;
+ ifs->num_constants = MAX2(ifs->num_constants, i + 1);
+ }
+ }
+ else if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+ i++) {
+ assert(i < I915_MAX_TEMPORARY);
+ p->temp_flag |= (1 << i); /* mark temp as used */
+ }
+ }
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX append the immediate to the const buffer... */
+ {
+ const struct tgsi_full_immediate *imm
+ = &parse.FullToken.FullImmediate;
+ const uint pos = p->num_immediates++;
+ uint j;
+ for (j = 0; j < imm->Immediate.Size; j++) {
+ p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ }
+ }
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (p->first_instruction) {
+ /* resolve location of immediates */
+ uint i, j;
+ for (i = 0; i < p->num_immediates; i++) {
+ /* find constant slot for this immediate */
+ for (j = 0; j < I915_MAX_CONSTANT; j++) {
+ if (ifs->constant_flags[j] == 0x0) {
+ memcpy(ifs->constants[j],
+ p->immediates[i],
+ 4 * sizeof(float));
+ /*printf("immediate %d maps to const %d\n", i, j);*/
+ ifs->constant_flags[j] = 0xf; /* all four comps used */
+ p->immediates_map[i] = j;
+ ifs->num_constants = MAX2(ifs->num_constants, j + 1);
+ break;
+ }
+ }
+ }
+
+ p->first_instruction = FALSE;
+ }
+
i915_translate_instruction(p, &parse.FullToken.FullInstruction);
break;
@@ -950,32 +1036,33 @@ i915_translate_instructions(struct i915_fp_compile *p,
static struct i915_fp_compile *
i915_init_compile(struct i915_context *i915,
- const struct pipe_shader_state *fs)
+ struct i915_fragment_shader *ifs)
{
struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
- p->shader = i915->fs;
+ p->shader = ifs;
- p->vertex_info = &i915->current.vertex_info;
-
- /* new constants found during translation get appended after the
- * user-provided constants.
+ /* Put new constants at end of const buffer, growing downward.
+ * The problem is we don't know how many user-defined constants might
+ * be specified with pipe->set_constant_buffer().
+ * Should pre-scan the user's program to determine the highest-numbered
+ * constant referenced.
*/
- p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT];
- p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT];
+ ifs->num_constants = 0;
+ memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
+
+ p->first_instruction = TRUE;
p->nr_tex_indirect = 1; /* correct? */
p->nr_tex_insn = 0;
p->nr_alu_insn = 0;
p->nr_decl_insn = 0;
- memset(p->constant_flags, 0, sizeof(p->constant_flags));
-
p->csr = p->program;
p->decl = p->declarations;
p->decl_s = 0;
p->decl_t = 0;
- p->temp_flag = 0xffff000;
+ p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
p->utemp_flag = ~0x7;
p->wpos_tex = -1;
@@ -993,6 +1080,7 @@ i915_init_compile(struct i915_context *i915,
static void
i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned long program_size = (unsigned long) (p->csr - p->program);
unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
@@ -1008,19 +1096,13 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
if (p->nr_decl_insn > I915_MAX_DECL_INSN)
i915_program_error(p, "Exceeded max DECL instructions");
- /* free old program, if present */
- if (i915->current.program) {
- FREE(i915->current.program);
- i915->current.program_len = 0;
- }
-
if (p->error) {
p->NumNativeInstructions = 0;
p->NumNativeAluInstructions = 0;
p->NumNativeTexInstructions = 0;
p->NumNativeTexIndirections = 0;
- i915_use_passthrough_shader(i915);
+ i915_use_passthrough_shader(ifs);
}
else {
p->NumNativeInstructions
@@ -1034,24 +1116,20 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
/* Copy compilation results to fragment program struct:
*/
- i915->current.program
+ assert(!ifs->program);
+ ifs->program
= (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
- if (i915->current.program) {
- i915->current.program_len = program_size + decl_size;
+ if (ifs->program) {
+ ifs->program_len = program_size + decl_size;
- memcpy(i915->current.program,
+ memcpy(ifs->program,
p->declarations,
decl_size * sizeof(uint));
- memcpy(i915->current.program + decl_size,
+ memcpy(ifs->program + decl_size,
p->program,
program_size * sizeof(uint));
}
-
- /* update number of constants */
- i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants;
- assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT]
- >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]);
}
/* Release the compilation struct:
@@ -1085,7 +1163,7 @@ i915_find_wpos_space(struct i915_fp_compile *p)
i915_program_error(p, "No free texcoord for wpos value");
}
#else
- if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ if (p->shader->state.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
/* frag shader using the fragment position input */
#if 0
assert(0);
@@ -1106,7 +1184,7 @@ static void
i915_fixup_depth_write(struct i915_fp_compile *p)
{
/* XXX assuming pos/depth is always in output[0] */
- if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ if (p->shader->state.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
const uint depth = UREG(REG_TYPE_OD, 0);
i915_emit_arith(p,
@@ -1121,13 +1199,18 @@ i915_fixup_depth_write(struct i915_fp_compile *p)
void
-i915_translate_fragment_program( struct i915_context *i915 )
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs)
{
- struct i915_fp_compile *p = i915_init_compile(i915, i915->fs);
- const struct tgsi_token *tokens = i915->fs->tokens;
+ struct i915_fp_compile *p = i915_init_compile(i915, fs);
+ const struct tgsi_token *tokens = fs->state.tokens;
i915_find_wpos_space(p);
+#if 0
+ tgsi_dump(tokens, 0);
+#endif
+
i915_translate_instructions(p, tokens);
i915_fixup_depth_write(p);
diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c
index 44c43259369..d8de5178f60 100644
--- a/src/gallium/drivers/i915simple/i915_prim_emit.c
+++ b/src/gallium/drivers/i915simple/i915_prim_emit.c
@@ -72,38 +72,42 @@ emit_hw_vertex( struct i915_context *i915,
uint i;
uint count = 0; /* for debug/sanity */
+ assert(!i915->dirty);
+
for (i = 0; i < vinfo->num_attribs; i++) {
+ const uint j = vinfo->src_index[i];
+ const float *attrib = vertex->data[j];
switch (vinfo->emit[i]) {
case EMIT_OMIT:
/* no-op */
break;
case EMIT_1F:
- OUT_BATCH( fui(vertex->data[i][0]) );
+ OUT_BATCH( fui(attrib[0]) );
count++;
break;
case EMIT_2F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
count += 2;
break;
case EMIT_3F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
- OUT_BATCH( fui(vertex->data[i][2]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
count += 3;
break;
case EMIT_4F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
- OUT_BATCH( fui(vertex->data[i][2]) );
- OUT_BATCH( fui(vertex->data[i][3]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
+ OUT_BATCH( fui(attrib[3]) );
count += 4;
break;
case EMIT_4UB:
- OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ),
- float_to_ubyte( vertex->data[i][1] ),
- float_to_ubyte( vertex->data[i][0] ),
- float_to_ubyte( vertex->data[i][3] )) );
+ OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ),
+ float_to_ubyte( attrib[1] ),
+ float_to_ubyte( attrib[0] ),
+ float_to_ubyte( attrib[3] )) );
count += 1;
break;
default:
@@ -122,17 +126,19 @@ emit_prim( struct draw_stage *stage,
unsigned nr )
{
struct i915_context *i915 = setup_stage(stage)->i915;
- unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
+ unsigned vertex_size;
unsigned i;
- assert(vertex_size >= 12); /* never smaller than 12 bytes */
-
if (i915->dirty)
i915_update_derived( i915 );
if (i915->hardware_dirty)
i915_emit_hardware_state( i915 );
+ /* need to do this after validation! */
+ vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
+ assert(vertex_size >= 12); /* never smaller than 12 bytes */
+
if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
FLUSH_BATCH();
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index c5bf6174f68..9d5f609220a 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -83,6 +83,12 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render )
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
+
+ if (i915->dirty) {
+ /* make sure we have up to date vertex layout */
+ i915_update_derived( i915 );
+ }
+
return &i915->current.vertex_info;
}
@@ -143,7 +149,8 @@ i915_vbuf_render_draw( struct vbuf_render *render,
assert(nr_indices);
- assert((i915->dirty & ~I915_NEW_VBO) == 0);
+ /* this seems to be bogus, since we validate state right after this */
+ /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/
if (i915->dirty)
i915_update_derived( i915 );
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index e055eed7e02..a35bdf941fc 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -38,6 +38,7 @@
#include "i915_reg.h"
#include "i915_state.h"
#include "i915_state_inlines.h"
+#include "i915_fpc.h"
/* The i915 (and related graphics cores) do not support GL_CLAMP. The
@@ -416,26 +417,47 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe,
}
-static void * i915_create_fs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *templ)
+
+static void *
+i915_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
{
- return 0;
+ struct i915_context *i915 = i915_context(pipe);
+ struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader);
+ if (!ifs)
+ return NULL;
+
+ ifs->state = *templ;
+
+ /* The shader's compiled to i915 instructions here */
+ i915_translate_fragment_program(i915, ifs);
+
+ return ifs;
}
-static void i915_bind_fs_state(struct pipe_context *pipe, void *fs)
+static void
+i915_bind_fs_state(struct pipe_context *pipe, void *shader)
{
struct i915_context *i915 = i915_context(pipe);
- i915->fs = (struct pipe_shader_state *)fs;
+ i915->fs = (struct i915_fragment_shader*) shader;
i915->dirty |= I915_NEW_FS;
}
-static void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
+static
+void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
{
- /*do nothing*/
+ struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader;
+
+ if (ifs->program)
+ FREE(ifs->program);
+ ifs->program_len = 0;
+
+ FREE(ifs);
}
+
static void *
i915_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -452,6 +474,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader)
/* just pass-through to draw module */
draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader);
+
+ i915->dirty |= I915_NEW_VS;
}
static void i915_delete_vs_state(struct pipe_context *pipe, void *shader)
diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c
index 4767584fc60..5cf70acdf3b 100644
--- a/src/gallium/drivers/i915simple/i915_state_derived.c
+++ b/src/gallium/drivers/i915simple/i915_state_derived.c
@@ -27,104 +27,111 @@
#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
#include "i915_context.h"
#include "i915_state.h"
#include "i915_reg.h"
#include "i915_fpc.h"
-#include "pipe/p_shader_tokens.h"
+
/**
- * Determine which post-transform / pre-rasterization vertex attributes
- * we need.
- * Derived from: fs, setup states.
+ * Determine the hardware vertex layout.
+ * Depends on vertex/fragment shader state.
*/
static void calculate_vertex_layout( struct i915_context *i915 )
{
- const struct pipe_shader_state *fs = i915->fs;
+ const struct pipe_shader_state *fs = &i915->fs->state;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
- uint front0 = 0, back0 = 0, front1 = 0, back1 = 0;
- boolean needW = 0;
+ boolean texCoords[8], colors[2], fog, needW;
uint i;
- boolean texCoords[8];
- uint src = 0;
+ int src;
memset(texCoords, 0, sizeof(texCoords));
+ colors[0] = colors[1] = fog = needW = FALSE;
memset(&vinfo, 0, sizeof(vinfo));
- /* pos */
- draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++);
- /* Note: we'll set the S4_VFMT_XYZ[W] bits below */
-
+ /* Determine which fragment program inputs are needed. Setup HW vertex
+ * layout below, in the HW-specific attribute order.
+ */
for (i = 0; i < fs->num_inputs; i++) {
switch (fs->input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
break;
case TGSI_SEMANTIC_COLOR:
- if (fs->input_semantic_index[i] == 0) {
- front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++);
- vinfo.hwfmt[0] |= S4_VFMT_COLOR;
- }
- else {
- assert(fs->input_semantic_index[i] == 1);
- front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++);
- vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
- }
+ assert(fs->input_semantic_index[i] < 2);
+ colors[fs->input_semantic_index[i]] = TRUE;
break;
case TGSI_SEMANTIC_GENERIC:
/* usually a texcoord */
{
const uint unit = fs->input_semantic_index[i];
- uint hwtc;
+ assert(unit < 8);
texCoords[unit] = TRUE;
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
- hwtc = TEXCOORDFMT_4D;
needW = TRUE;
- vinfo.hwfmt[1] |= hwtc << (unit * 4);
}
break;
case TGSI_SEMANTIC_FOG:
- debug_printf("i915 fogcoord not implemented yet\n");
- draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++);
+ fog = TRUE;
break;
default:
assert(0);
}
-
}
- /* finish up texcoord fields */
- for (i = 0; i < 8; i++) {
- if (!texCoords[i]) {
- const uint hwtc = TEXCOORDFMT_NOT_PRESENT;
- vinfo.hwfmt[1] |= hwtc << (i* 4);
- }
- }
-
- /* go back and fill in the vertex position info now that we have needW */
+
+ /* pos */
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
vinfo.emit[0] = EMIT_4F;
}
else {
+ draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZ;
vinfo.emit[0] = EMIT_3F;
}
- /* Additional attributes required for setup: Just twosided
- * lighting. Edgeflag is dealt with specially by setting bits in
- * the vertex header.
- */
- if (i915->rasterizer->light_twoside) {
- if (front0) {
- back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++);
+ /* hardware point size */
+ /* XXX todo */
+
+ /* primary color */
+ if (colors[0]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_COLOR;
+ }
+
+ /* secondary color */
+ if (colors[1]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
+ }
+
+ /* fog coord, not fog blend factor */
+ if (fog) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
+ }
+
+ /* texcoords */
+ for (i = 0; i < 8; i++) {
+ uint hwtc;
+ if (texCoords[i]) {
+ hwtc = TEXCOORDFMT_4D;
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
- if (back0) {
- back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++);
+ else {
+ hwtc = TEXCOORDFMT_NOT_PRESENT;
}
+ vinfo.hwfmt[1] |= hwtc << (i * 4);
}
draw_compute_vertex_size(&vinfo);
@@ -148,7 +155,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
*/
void i915_update_derived( struct i915_context *i915 )
{
- if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS))
+ if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS))
calculate_vertex_layout( i915 );
if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE))
@@ -164,7 +171,6 @@ void i915_update_derived( struct i915_context *i915 )
i915_update_dynamic( i915 );
if (i915->dirty & I915_NEW_FS) {
- i915_translate_fragment_program(i915);
i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */
}
diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c
index 3339287f498..6bbaac4e34c 100644
--- a/src/gallium/drivers/i915simple/i915_state_emit.c
+++ b/src/gallium/drivers/i915simple/i915_state_emit.c
@@ -99,7 +99,11 @@ i915_emit_hardware_state(struct i915_context *i915 )
2 + I915_TEX_UNITS*3 +
2 + I915_TEX_UNITS*3 +
2 + I915_MAX_CONSTANT*4 +
+#if 0
i915->current.program_len +
+#else
+ i915->fs->program_len +
+#endif
6
) * 3/2; /* plus 50% margin */
const unsigned relocs = ( I915_TEX_UNITS +
@@ -325,15 +329,34 @@ i915_emit_hardware_state(struct i915_context *i915 )
/* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_PROGRAM)
{
- const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT];
- assert(nr <= I915_MAX_CONSTANT);
- if (nr > 0) {
- const uint *c
- = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT];
+ /* Collate the user-defined constants with the fragment shader's
+ * immediates according to the constant_flags[] array.
+ */
+ const uint nr = i915->fs->num_constants;
+ if (nr) {
uint i;
+
OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) );
+
for (i = 0; i < nr; i++) {
+ const uint *c;
+ if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
+ /* grab user-defined constant */
+ c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i];
+ }
+ else {
+ /* emit program constant */
+ c = (uint *) i915->fs->constants[i];
+ }
+#if 0 /* debug */
+ {
+ float *f = (float *) c;
+ printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
+ (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
+ ? "user" : "immediate"));
+ }
+#endif
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
@@ -348,9 +371,9 @@ i915_emit_hardware_state(struct i915_context *i915 )
{
uint i;
/* we should always have, at least, a pass-through program */
- assert(i915->current.program_len > 0);
- for (i = 0; i < i915->current.program_len; i++) {
- OUT_BATCH(i915->current.program[i]);
+ assert(i915->fs->program_len > 0);
+ for (i = 0; i < i915->fs->program_len; i++) {
+ OUT_BATCH(i915->fs->program[i]);
}
}
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
index d581ee8d3ca..4c1a6d5df0b 100644
--- a/src/gallium/drivers/softpipe/SConscript
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -5,6 +5,9 @@ env = env.Clone()
softpipe = env.ConvenienceLibrary(
target = 'softpipe',
source = [
+ 'sp_fs_exec.c',
+ 'sp_fs_sse.c',
+ 'sp_fs_llvm.c',
'sp_clear.c',
'sp_context.c',
'sp_draw_arrays.c',
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 8cb0534342d..d5bd7a702f1 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -81,7 +81,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
static void
-exec_prepare( struct sp_fragment_shader *base,
+exec_prepare( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *samplers )
{
@@ -98,7 +98,7 @@ exec_prepare( struct sp_fragment_shader *base,
* interface:
*/
static unsigned
-exec_run( struct sp_fragment_shader *base,
+exec_run( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
struct quad_header *quad )
{
diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c
index 22da4714533..34b2b7d4e24 100644
--- a/src/gallium/drivers/softpipe/sp_fs_llvm.c
+++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c
@@ -146,7 +146,7 @@ shade_quad_llvm(struct quad_stage *qs,
unsigned
-run_llvm_fs( struct sp_fragment_shader *base,
+run_llvm_fs( const struct sp_fragment_shader *base,
struct foo *machine )
{
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c
index 7b1e131ee14..b6a3fddb29c 100644
--- a/src/gallium/drivers/softpipe/sp_prim_setup.c
+++ b/src/gallium/drivers/softpipe/sp_prim_setup.c
@@ -1165,6 +1165,10 @@ static void setup_begin( struct draw_stage *stage )
struct softpipe_context *sp = setup->softpipe;
const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
+ if (sp->dirty) {
+ softpipe_update_derived(sp);
+ }
+
setup->quad.nr_attrs = fs->num_inputs;
sp->quad.first->begin(sp->quad.first);
diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c
index 15b5594547d..142dbcc7710 100644
--- a/src/gallium/drivers/softpipe/sp_quad.c
+++ b/src/gallium/drivers/softpipe/sp_quad.c
@@ -56,11 +56,12 @@ sp_build_depth_stencil(
void
sp_build_quad_pipeline(struct softpipe_context *sp)
{
- boolean early_depth_test =
+ boolean early_depth_test =
sp->depth_stencil->depth.enabled &&
sp->framebuffer.zsbuf &&
!sp->depth_stencil->alpha.enabled &&
- sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION;
+ !sp->fs->uses_kill &&
+ !sp->fs->writes_z;
/* build up the pipeline in reverse order... */
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index ef8cf67d4c3..5aaa9e346bc 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -63,14 +63,17 @@ struct tgsi_exec_machine;
struct sp_fragment_shader {
struct pipe_shader_state shader;
- void (*prepare)( struct sp_fragment_shader *shader,
+ boolean uses_kill;
+ boolean writes_z;
+
+ void (*prepare)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *samplers);
/* Run the shader - this interface will get cleaned up in the
* future:
*/
- unsigned (*run)( struct sp_fragment_shader *shader,
+ unsigned (*run)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
struct quad_header *quad );
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index f9f2c5eaa8f..4c6313001f4 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -35,33 +35,6 @@
/**
- * Search vertex program's outputs to find a match for the given
- * semantic name/index. Return the index of the output slot.
- *
- * Return 0 if not found. This will cause the fragment program to use
- * vertex attrib 0 (position) in the cases where the fragment program
- * attempts to use a missing vertex program output. This is an undefined
- * condition that users shouldn't hit anyway.
- */
-static int
-find_vs_output(struct softpipe_context *sp,
- const struct pipe_shader_state *vs,
- uint semantic_name,
- uint semantic_index)
-{
- uint i;
- for (i = 0; i < vs->num_outputs; i++) {
- if (vs->output_semantic_name[i] == semantic_name &&
- vs->output_semantic_index[i] == semantic_index)
- return i;
- }
-
- /* See if the draw module is introducing a new attribute... */
- return draw_find_vs_output(sp->draw, semantic_name, semantic_index);
-}
-
-
-/**
* Mark the current vertex layout as "invalid".
* We'll validate the vertex layout later, when we start to actually
* render a point or line or tri.
@@ -114,24 +87,25 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
int src;
switch (fs->input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_vs_output(softpipe->draw,
+ TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
break;
case TGSI_SEMANTIC_COLOR:
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_COLOR,
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR,
fs->input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
break;
case TGSI_SEMANTIC_FOG:
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
break;
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_GENERIC,
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC,
fs->input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
break;
@@ -141,7 +115,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
}
}
- softpipe->psize_slot = find_vs_output(softpipe, vs, TGSI_SEMANTIC_PSIZE, 0);
+ softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
+ TGSI_SEMANTIC_PSIZE, 0);
if (softpipe->psize_slot > 0) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
softpipe->psize_slot);
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index b0238f81737..b184ac61bb9 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -36,6 +36,7 @@
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/util/tgsi_scan.h"
void *
@@ -44,21 +45,24 @@ softpipe_create_fs_state(struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
struct sp_fragment_shader *state;
+ struct tgsi_shader_info info;
+
+ tgsi_scan_shader(templ->tokens, &info);
if (softpipe->dump_fs)
tgsi_dump(templ->tokens, 0);
state = softpipe_create_fs_llvm( softpipe, templ );
- if (state)
- return state;
-
- state = softpipe_create_fs_sse( softpipe, templ );
- if (state)
- return state;
-
- state = softpipe_create_fs_exec( softpipe, templ );
-
+ if (!state) {
+ state = softpipe_create_fs_sse( softpipe, templ );
+ if (!state) {
+ state = softpipe_create_fs_exec( softpipe, templ );
+ }
+ }
assert(state);
+ state->uses_kill = (info.opcode_count[TGSI_OPCODE_KIL] ||
+ info.opcode_count[TGSI_OPCODE_KILP]);
+ state->writes_z = info.writes_z;
return state;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 43d5085895f..0ced585c7f3 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -343,7 +343,7 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size)
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
i = ifloor(s);
- return CLAMP(i, 0, size-1);
+ return CLAMP(i, 0, (int) size-1);
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
/* fall-through */
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
@@ -366,7 +366,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size,
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
/* Not exactly what the spec says, but it matches NVIDIA output */
- s = CLAMP(s - 0.5F, 0.0, (float) size - 1.0);
+ s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
*i0 = ifloor(s);
*i1 = *i0 + 1;
break;
@@ -377,7 +377,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size,
s -= 0.5F;
*i0 = ifloor(s);
*i1 = *i0 + 1;
- if (*i1 > size - 1)
+ if (*i1 > (int) size - 1)
*i1 = size - 1;
break;
default: