summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h2
-rw-r--r--src/gallium/drivers/i915/i915_context.h6
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c254
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c3
-rw-r--r--src/gallium/drivers/i915/i915_screen.c5
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c31
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile2
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c2
-rw-r--r--src/gallium/drivers/nvfx/nvfx_surface.c4
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.c2
-rw-r--r--src/gallium/drivers/r300/Makefile3
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c474
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h14
-rw-r--r--src/gallium/drivers/r300/r300_screen.c8
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c2
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c158
-rw-r--r--src/gallium/drivers/r600/evergreend.h12
-rw-r--r--src/gallium/drivers/r600/r600.h70
-rw-r--r--src/gallium/drivers/r600/r600_asm.c4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h30
-rw-r--r--src/gallium/drivers/r600/r600_state.c94
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c25
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c338
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c12
25 files changed, 869 insertions, 690 deletions
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index 78554034781..14ae749c828 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -95,7 +95,7 @@ static INLINE int
i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
- size_t offset, bool fenced)
+ size_t offset, boolean fenced)
{
return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset, fenced);
}
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index dacf50e870d..964948edc0e 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -124,6 +124,12 @@ struct i915_fragment_shader
* Else, the bitmask indicates which components are occupied by immediates.
*/
ubyte constant_flags[I915_MAX_CONSTANT];
+
+ /**
+ * The mapping between generics and hw texture coords.
+ * We need to share this between the vertex and fragment stages.
+ **/
+ int generic_mapping[I915_TEX_UNITS];
};
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index b145b58be30..27f100843bf 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -133,7 +133,21 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
p->error = 1;
}
-
+static uint get_mapping(struct i915_fragment_shader* fs, int unit)
+{
+ int i;
+ for (i = 0; i < I915_TEX_UNITS; i++)
+ {
+ if (fs->generic_mapping[i] == -1) {
+ fs->generic_mapping[i] = unit;
+ return i;
+ }
+ if (fs->generic_mapping[i] == unit)
+ return i;
+ }
+ debug_printf("Exceeded max generics\n");
+ return 0;
+}
/**
* Construct a ureg for the given source register. Will emit
@@ -141,7 +155,8 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
*/
static uint
src_vector(struct i915_fp_compile *p,
- const struct tgsi_full_src_register *source)
+ const struct tgsi_full_src_register *source,
+ struct i915_fragment_shader* fs)
{
uint index = source->Register.Index;
uint src = 0, sem_name, sem_ind;
@@ -192,9 +207,11 @@ src_vector(struct i915_fp_compile *p,
src = swizzle(src, W, W, W, W);
break;
case TGSI_SEMANTIC_GENERIC:
- /* usually a texcoord */
- src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL);
- break;
+ {
+ int real_tex_unit = get_mapping(fs, sem_ind);
+ src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
+ break;
+ }
default:
i915_program_error(p, "Bad source->Index");
return 0;
@@ -336,13 +353,14 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
static void
emit_tex(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
- uint opcode)
+ uint opcode,
+ struct i915_fragment_shader* fs)
{
uint texture = inst->Texture.Texture;
uint unit = inst->Src[1].Register.Index;
uint tex = translate_tex_src_target( p, texture );
uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
- uint coord = src_vector( p, &inst->Src[0]);
+ uint coord = src_vector( p, &inst->Src[0], fs);
i915_emit_texld( p,
get_result_vector( p, &inst->Dst[0] ),
@@ -361,15 +379,16 @@ emit_tex(struct i915_fp_compile *p,
static void
emit_simple_arith(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
- uint opcode, uint numArgs)
+ uint opcode, uint numArgs,
+ struct i915_fragment_shader* fs)
{
uint arg1, arg2, arg3;
assert(numArgs <= 3);
- arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] );
- arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] );
- arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] );
+ arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs );
+ arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs );
+ arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs );
i915_emit_arith( p,
opcode,
@@ -385,7 +404,8 @@ emit_simple_arith(struct i915_fp_compile *p,
static void
emit_simple_arith_swap2(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
- uint opcode, uint numArgs)
+ uint opcode, uint numArgs,
+ struct i915_fragment_shader* fs)
{
struct tgsi_full_instruction inst2;
@@ -396,7 +416,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
inst2.Src[0] = inst->Src[1];
inst2.Src[1] = inst->Src[0];
- emit_simple_arith(p, &inst2, opcode, numArgs);
+ emit_simple_arith(p, &inst2, opcode, numArgs, fs);
}
@@ -415,7 +435,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
*/
static void
i915_translate_instruction(struct i915_fp_compile *p,
- const struct tgsi_full_instruction *inst)
+ const struct tgsi_full_instruction *inst,
+ struct i915_fragment_shader *fs)
{
uint writemask;
uint src0, src1, src2, flags;
@@ -423,7 +444,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_MAX,
get_result_vector(p, &inst->Dst[0]),
@@ -432,13 +453,13 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_ADD:
- emit_simple_arith(p, inst, A0_ADD, 2);
+ emit_simple_arith(p, inst, A0_ADD, 2, fs);
break;
case TGSI_OPCODE_CMP:
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
- src2 = src_vector(p, &inst->Src[2]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
+ src2 = src_vector(p, &inst->Src[2], fs);
i915_emit_arith(p, A0_CMP,
get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst),
@@ -446,7 +467,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_COS:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@@ -495,17 +516,28 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_const4fv(p, cos_constants), 0);
break;
+ case TGSI_OPCODE_DP2:
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
+
+ i915_emit_arith(p,
+ A0_DP3,
+ get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
+ break;
+
case TGSI_OPCODE_DP3:
- emit_simple_arith(p, inst, A0_DP3, 2);
+ emit_simple_arith(p, inst, A0_DP3, 2, fs);
break;
case TGSI_OPCODE_DP4:
- emit_simple_arith(p, inst, A0_DP4, 2);
+ emit_simple_arith(p, inst, A0_DP4, 2, fs);
break;
case TGSI_OPCODE_DPH:
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
i915_emit_arith(p,
A0_DP4,
@@ -515,8 +547,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_DST:
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
/* result[0] = 1 * 1;
* result[1] = a[1] * b[1];
@@ -536,7 +568,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_EX2:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_EXP,
@@ -546,16 +578,16 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_FLR:
- emit_simple_arith(p, inst, A0_FLR, 1);
+ emit_simple_arith(p, inst, A0_FLR, 1, fs);
break;
case TGSI_OPCODE_FRC:
- emit_simple_arith(p, inst, A0_FRC, 1);
+ emit_simple_arith(p, inst, A0_FRC, 1, fs);
break;
case TGSI_OPCODE_KIL:
/* kill if src[0].x < 0 || src[0].y < 0 ... */
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
i915_emit_texld(p,
@@ -571,7 +603,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LG2:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_LOG,
@@ -581,7 +613,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LIT:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
/* tmp = max( a.xyzw, a.00zw )
@@ -614,9 +646,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LRP:
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
- src2 = src_vector(p, &inst->Src[2]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
+ src2 = src_vector(p, &inst->Src[2], fs);
flags = get_result_flags(inst);
tmp = i915_get_utemp(p);
@@ -636,16 +668,16 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_MAD:
- emit_simple_arith(p, inst, A0_MAD, 3);
+ emit_simple_arith(p, inst, A0_MAD, 3, fs);
break;
case TGSI_OPCODE_MAX:
- emit_simple_arith(p, inst, A0_MAX, 2);
+ emit_simple_arith(p, inst, A0_MAX, 2, fs);
break;
case TGSI_OPCODE_MIN:
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
flags = get_result_flags(inst);
@@ -662,16 +694,16 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_MOV:
- emit_simple_arith(p, inst, A0_MOV, 1);
+ emit_simple_arith(p, inst, A0_MOV, 1, fs);
break;
case TGSI_OPCODE_MUL:
- emit_simple_arith(p, inst, A0_MUL, 2);
+ emit_simple_arith(p, inst, A0_MUL, 2, fs);
break;
case TGSI_OPCODE_POW:
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
flags = get_result_flags(inst);
@@ -695,17 +727,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_RCP:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_RCP,
get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
+ get_result_flags(inst), 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case TGSI_OPCODE_RSQ:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_RSQ,
@@ -715,7 +747,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_SCS:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
/*
@@ -778,17 +810,40 @@ i915_translate_instruction(struct i915_fp_compile *p,
}
break;
- case TGSI_OPCODE_SGE:
- emit_simple_arith(p, inst, A0_SGE, 2);
+ case TGSI_OPCODE_SEQ:
+ /* if we're both >= and <= then we're == */
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_SGE,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ src0,
+ src1, 0);
+
+ i915_emit_arith(p,
+ A0_SGE,
+ get_result_vector(p, &inst->Dst[0]),
+ A0_DEST_CHANNEL_ALL, 0,
+ src1,
+ src0, 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ get_result_vector(p, &inst->Dst[0]),
+ A0_DEST_CHANNEL_ALL, 0,
+ get_result_vector(p, &inst->Dst[0]),
+ tmp, 0);
+
break;
- case TGSI_OPCODE_SLE:
- /* like SGE, but swap reg0, reg1 */
- emit_simple_arith_swap2(p, inst, A0_SGE, 2);
+ case TGSI_OPCODE_SGE:
+ emit_simple_arith(p, inst, A0_SGE, 2, fs);
break;
case TGSI_OPCODE_SIN:
- src0 = src_vector(p, &inst->Src[0]);
+ src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@@ -837,18 +892,78 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_const4fv(p, sin_constants), 0);
break;
+ case TGSI_OPCODE_SLE:
+ /* like SGE, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
+ break;
+
case TGSI_OPCODE_SLT:
- emit_simple_arith(p, inst, A0_SLT, 2);
+ emit_simple_arith(p, inst, A0_SLT, 2, fs);
break;
case TGSI_OPCODE_SGT:
/* like SLT, but swap reg0, reg1 */
- emit_simple_arith_swap2(p, inst, A0_SLT, 2);
+ emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
+ break;
+
+ case TGSI_OPCODE_SNE:
+ /* if we're neither < nor > then we're != */
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_SLT,
+ tmp,
+ A0_DEST_CHANNEL_ALL, 0,
+ src0,
+ src1, 0);
+
+ i915_emit_arith(p,
+ A0_SLT,
+ get_result_vector(p, &inst->Dst[0]),
+ A0_DEST_CHANNEL_ALL, 0,
+ src1,
+ src0, 0);
+
+ i915_emit_arith(p,
+ A0_ADD,
+ get_result_vector(p, &inst->Dst[0]),
+ A0_DEST_CHANNEL_ALL, 0,
+ get_result_vector(p, &inst->Dst[0]),
+ tmp, 0);
+ break;
+
+ case TGSI_OPCODE_SSG:
+ /* compute (src>0) - (src<0) */
+ src0 = src_vector(p, &inst->Src[0], fs);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_SLT,
+ tmp,
+ A0_DEST_CHANNEL_ALL, 0,
+ src0,
+ swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
+
+ i915_emit_arith(p,
+ A0_SLT,
+ get_result_vector(p, &inst->Dst[0]),
+ A0_DEST_CHANNEL_ALL, 0,
+ swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+ src0, 0);
+
+ i915_emit_arith(p,
+ A0_ADD,
+ get_result_vector(p, &inst->Dst[0]),
+ A0_DEST_CHANNEL_ALL, 0,
+ get_result_vector(p, &inst->Dst[0]),
+ negate(tmp, 1, 1, 1, 1), 0);
break;
case TGSI_OPCODE_SUB:
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
i915_emit_arith(p,
A0_ADD,
@@ -858,15 +973,19 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_TEX:
- emit_tex(p, inst, T0_TEXLD);
+ emit_tex(p, inst, T0_TEXLD, fs);
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ emit_simple_arith(p, inst, A0_TRC, 1, fs);
break;
case TGSI_OPCODE_TXB:
- emit_tex(p, inst, T0_TEXLDB);
+ emit_tex(p, inst, T0_TEXLDB, fs);
break;
case TGSI_OPCODE_TXP:
- emit_tex(p, inst, T0_TEXLDP);
+ emit_tex(p, inst, T0_TEXLDP, fs);
break;
case TGSI_OPCODE_XPD:
@@ -876,8 +995,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
* result.z = src0.x * src1.y - src0.y * src1.x;
* result.w = undef;
*/
- src0 = src_vector(p, &inst->Src[0]);
- src1 = src_vector(p, &inst->Src[1]);
+ src0 = src_vector(p, &inst->Src[0], fs);
+ src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@@ -912,7 +1031,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
*/
static void
i915_translate_instructions(struct i915_fp_compile *p,
- const struct tgsi_token *tokens)
+ const struct tgsi_token *tokens,
+ struct i915_fragment_shader *fs)
{
struct i915_fragment_shader *ifs = p->shader;
struct tgsi_parse_context parse;
@@ -993,7 +1113,7 @@ i915_translate_instructions(struct i915_fp_compile *p,
p->first_instruction = FALSE;
}
- i915_translate_instruction(p, &parse.FullToken.FullInstruction);
+ i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs);
break;
default:
@@ -1011,6 +1131,7 @@ i915_init_compile(struct i915_context *i915,
struct i915_fragment_shader *ifs)
{
struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
+ int i;
p->shader = ifs;
@@ -1023,6 +1144,9 @@ i915_init_compile(struct i915_context *i915,
ifs->num_constants = 0;
memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
+ for (i = 0; i < I915_TEX_UNITS; i++)
+ ifs->generic_mapping[i] = -1;
+
p->first_instruction = TRUE;
p->nr_tex_indirect = 1; /* correct? */
@@ -1192,7 +1316,7 @@ i915_translate_fragment_program( struct i915_context *i915,
p = i915_init_compile(i915, fs);
i915_find_wpos_space(p);
- i915_translate_instructions(p, tokens);
+ i915_translate_instructions(p, tokens, fs);
i915_fixup_depth_write(p);
i915_fini_compile(i915, p);
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index e05b059706d..b74b19d0fe4 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -700,7 +700,8 @@ i915_texture_destroy(struct pipe_screen *screen,
struct i915_winsys *iws = i915_screen(screen)->iws;
uint i;
- iws->buffer_destroy(iws, tex->buffer);
+ if (tex->buffer)
+ iws->buffer_destroy(iws, tex->buffer);
for (i = 0; i < Elements(tex->image_offset); i++)
if (tex->image_offset[i])
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index da96b420f2c..c86baa58b28 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -134,6 +134,11 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_SM3:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
return 0;
/* Features we can lie about (boolean caps). */
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 59ac2f7292a..bf6b30a4530 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -35,6 +35,18 @@
#include "i915_debug.h"
#include "i915_reg.h"
+static uint find_mapping(struct i915_fragment_shader* fs, int unit)
+{
+ int i;
+ for (i = 0; i < I915_TEX_UNITS ; i++)
+ {
+ if (fs->generic_mapping[i] == unit)
+ return i;
+ }
+ debug_printf("Mapping not found\n");
+ return 0;
+}
+
/***********************************************************************
@@ -46,7 +58,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
const struct i915_fragment_shader *fs = i915->fs;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
- boolean texCoords[8], colors[2], fog, needW;
+ boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW;
uint i;
int src;
@@ -66,11 +78,12 @@ static void calculate_vertex_layout(struct i915_context *i915)
colors[fs->info.input_semantic_index[i]] = TRUE;
break;
case TGSI_SEMANTIC_GENERIC:
- /* usually a texcoord */
{
- const uint unit = fs->info.input_semantic_index[i];
- assert(unit < 8);
- texCoords[unit] = TRUE;
+ /* texcoords/varyings/other generic */
+ /* XXX handle back/front face and point size */
+ uint unit = fs->info.input_semantic_index[i];
+
+ texCoords[find_mapping(fs, unit)] = TRUE;
needW = TRUE;
}
break;
@@ -82,7 +95,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
}
}
-
+
/* pos */
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
@@ -120,12 +133,12 @@ static void calculate_vertex_layout(struct i915_context *i915)
vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
}
- /* texcoords */
- for (i = 0; i < 8; i++) {
+ /* texcoords/varyings */
+ for (i = 0; i < I915_TEX_UNITS; i++) {
uint hwtc;
if (texCoords[i]) {
hwtc = TEXCOORDFMT_4D;
- src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]);
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
else {
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 4068bed393c..ba9705bebee 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -71,7 +71,7 @@ lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxil
LDFLAGS += $(LLVM_LDFLAGS)
LIBS += -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) $(GL_LIB_DEPS)
-LD=g++
+LD=$(CXX)
$(PROGS): lp_test_main.o libllvmpipe.a
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 8d75dd07c4d..fb125f3a8d8 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -508,7 +508,7 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
unsigned s, i;
- for (s = 0; s < 5; ++s)
+ for (s = 0; s < 3; ++s)
for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i)
if (nv50_context(pipe)->samplers[s][i] == hwcso)
nv50_context(pipe)->samplers[s][i] = NULL;
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index ced26494e15..339906e6a63 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -84,7 +84,7 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
}
static INLINE void
-nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)
+nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, boolean for_write)
{
rgn->x = x;
rgn->y = y;
@@ -120,7 +120,7 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf,
}
static INLINE void
-nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, bool for_write)
+nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, boolean for_write)
{
if(pt->target != PIPE_BUFFER)
{
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c
index 2debcb6eb8f..cc4b51ec1f8 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -56,7 +56,7 @@ nvfx_transfer_new(struct pipe_context *pipe,
else
{
struct nvfx_staging_transfer* tx;
- bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
+ boolean direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
tx = CALLOC_STRUCT(nvfx_staging_transfer);
if(!tx)
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 37b0f01cfd3..dfedf353877 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -30,7 +30,8 @@ C_SOURCES = \
r300_transfer.c
LIBRARY_INCLUDES = \
- -I$(TOP)/src/mesa/drivers/dri/r300/compiler
+ -I$(TOP)/src/mesa/drivers/dri/r300/compiler \
+ -I$(TOP)/include
COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 4949703120d..571986c3011 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <[email protected]>
+ * Copyright 2011 Marek Olšák <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -32,392 +33,109 @@
/* Parse a PCI ID and fill an r300_capabilities struct with information. */
void r300_parse_chipset(struct r300_capabilities* caps)
{
- /* Reasonable defaults */
- caps->num_vert_fpus = 2;
- caps->num_tex_units = 16;
- caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
- caps->hiz_ram = 0;
- caps->is_r400 = FALSE;
- caps->is_r500 = FALSE;
- caps->high_second_pipe = FALSE;
-
- /* Note: These are not ordered by PCI ID. I leave that task to GCC,
- * which will perform the ordering while collating jump tables. Instead,
- * I've tried to group them according to capabilities and age. */
switch (caps->pci_id) {
- case 0x4144:
- caps->family = CHIP_FAMILY_R300;
- caps->high_second_pipe = TRUE;
- caps->num_vert_fpus = 4;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x4145:
- case 0x4146:
- case 0x4147:
- case 0x4E44:
- case 0x4E45:
- case 0x4E46:
- case 0x4E47:
- caps->family = CHIP_FAMILY_R300;
- caps->high_second_pipe = TRUE;
- caps->num_vert_fpus = 4;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x4150:
- case 0x4151:
- case 0x4152:
- case 0x4153:
- case 0x4154:
- case 0x4155:
- case 0x4156:
- case 0x4E50:
- case 0x4E51:
- case 0x4E52:
- case 0x4E53:
- case 0x4E54:
- case 0x4E56:
- caps->family = CHIP_FAMILY_RV350;
- caps->high_second_pipe = TRUE;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
- break;
-
- case 0x4148:
- case 0x4149:
- case 0x414A:
- case 0x414B:
- case 0x4E48:
- case 0x4E49:
- case 0x4E4B:
- caps->family = CHIP_FAMILY_R350;
- caps->high_second_pipe = TRUE;
- caps->num_vert_fpus = 4;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x4E4A:
- caps->family = CHIP_FAMILY_R360;
- caps->high_second_pipe = TRUE;
- caps->num_vert_fpus = 4;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x5460:
- case 0x5462:
- case 0x5464:
- case 0x5B60:
- case 0x5B62:
- case 0x5B63:
- case 0x5B64:
- case 0x5B65:
- caps->family = CHIP_FAMILY_RV370;
- caps->high_second_pipe = TRUE;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
- break;
-
- case 0x3150:
- case 0x3152:
- case 0x3154:
- case 0x3155:
- case 0x3E50:
- case 0x3E54:
- caps->family = CHIP_FAMILY_RV380;
- caps->high_second_pipe = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
- break;
-
- case 0x4A48:
- case 0x4A49:
- case 0x4A4A:
- case 0x4A4B:
- case 0x4A4C:
- case 0x4A4D:
- case 0x4A4E:
- case 0x4A4F:
- case 0x4A50:
- case 0x4A54:
- caps->family = CHIP_FAMILY_R420;
- caps->num_vert_fpus = 6;
- caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x5548:
- case 0x5549:
- case 0x554A:
- case 0x554B:
- case 0x5550:
- case 0x5551:
- case 0x5552:
- case 0x5554:
- case 0x5D57:
- caps->family = CHIP_FAMILY_R423;
- caps->num_vert_fpus = 6;
- caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x554C:
- case 0x554D:
- case 0x554E:
- case 0x554F:
- case 0x5D48:
- case 0x5D49:
- case 0x5D4A:
- caps->family = CHIP_FAMILY_R430;
- caps->num_vert_fpus = 6;
- caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x5D4C:
- case 0x5D4D:
- case 0x5D4E:
- case 0x5D4F:
- case 0x5D50:
- case 0x5D52:
- caps->family = CHIP_FAMILY_R480;
- caps->num_vert_fpus = 6;
- caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x4B48:
- case 0x4B49:
- case 0x4B4A:
- case 0x4B4B:
- case 0x4B4C:
- caps->family = CHIP_FAMILY_R481;
- caps->num_vert_fpus = 6;
- caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x5E4C:
- case 0x5E4F:
- case 0x564A:
- case 0x564B:
- case 0x564F:
- case 0x5652:
- case 0x5653:
- case 0x5657:
- case 0x5E48:
- case 0x5E4A:
- case 0x5E4B:
- case 0x5E4D:
- caps->family = CHIP_FAMILY_RV410;
- caps->num_vert_fpus = 6;
- caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x5954:
- case 0x5955:
- caps->family = CHIP_FAMILY_RS480;
- caps->has_tcl = FALSE;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
+#define CHIPSET(pci_id, name, chipfamily) \
+ case pci_id: \
+ caps->family = CHIP_FAMILY_##chipfamily; \
break;
+#include "pci_ids/r300_pci_ids.h"
+#undef CHIPSET
- case 0x5974:
- case 0x5975:
- caps->family = CHIP_FAMILY_RS482;
- caps->has_tcl = FALSE;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
- break;
-
- case 0x5A41:
- case 0x5A42:
- caps->family = CHIP_FAMILY_RS400;
- caps->has_tcl = FALSE;
- break;
-
- case 0x5A61:
- case 0x5A62:
- caps->family = CHIP_FAMILY_RC410;
- caps->has_tcl = FALSE;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
- break;
-
- case 0x791E:
- case 0x791F:
- caps->family = CHIP_FAMILY_RS690;
- caps->has_tcl = FALSE;
- caps->is_r400 = TRUE;
- break;
-
- case 0x793F:
- case 0x7941:
- case 0x7942:
- caps->family = CHIP_FAMILY_RS600;
- caps->has_tcl = FALSE;
- caps->is_r400 = TRUE;
- break;
-
- case 0x796C:
- case 0x796D:
- case 0x796E:
- case 0x796F:
- caps->family = CHIP_FAMILY_RS740;
- caps->has_tcl = FALSE;
- caps->is_r400 = TRUE;
- break;
-
- case 0x7100:
- case 0x7101:
- case 0x7102:
- case 0x7103:
- case 0x7104:
- case 0x7105:
- case 0x7106:
- case 0x7108:
- case 0x7109:
- case 0x710A:
- case 0x710B:
- case 0x710C:
- case 0x710E:
- case 0x710F:
- caps->family = CHIP_FAMILY_R520;
- caps->num_vert_fpus = 8;
- caps->is_r500 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x7140:
- case 0x7141:
- case 0x7142:
- case 0x7143:
- case 0x7144:
- case 0x7145:
- case 0x7146:
- case 0x7147:
- case 0x7149:
- case 0x714A:
- case 0x714B:
- case 0x714C:
- case 0x714D:
- case 0x714E:
- case 0x714F:
- case 0x7151:
- case 0x7152:
- case 0x7153:
- case 0x715E:
- case 0x715F:
- case 0x7180:
- case 0x7181:
- case 0x7183:
- case 0x7186:
- case 0x7187:
- case 0x7188:
- case 0x718A:
- case 0x718B:
- case 0x718C:
- case 0x718D:
- case 0x718F:
- case 0x7193:
- case 0x7196:
- case 0x719B:
- case 0x719F:
- case 0x7200:
- case 0x7210:
- case 0x7211:
- caps->family = CHIP_FAMILY_RV515;
- caps->num_vert_fpus = 2;
- caps->is_r500 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x71C0:
- case 0x71C1:
- case 0x71C2:
- case 0x71C3:
- case 0x71C4:
- case 0x71C5:
- case 0x71C6:
- case 0x71C7:
- case 0x71CD:
- case 0x71CE:
- case 0x71D2:
- case 0x71D4:
- case 0x71D5:
- case 0x71D6:
- case 0x71DA:
- case 0x71DE:
- caps->family = CHIP_FAMILY_RV530;
- caps->num_vert_fpus = 5;
- caps->is_r500 = TRUE;
- caps->hiz_ram = RV530_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x7240:
- case 0x7243:
- case 0x7244:
- case 0x7245:
- case 0x7246:
- case 0x7247:
- case 0x7248:
- case 0x7249:
- case 0x724A:
- case 0x724B:
- case 0x724C:
- case 0x724D:
- case 0x724E:
- case 0x724F:
- case 0x7284:
- caps->family = CHIP_FAMILY_R580;
- caps->num_vert_fpus = 8;
- caps->is_r500 = TRUE;
- caps->hiz_ram = RV530_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x7280:
- caps->family = CHIP_FAMILY_RV570;
- caps->num_vert_fpus = 8;
- caps->is_r500 = TRUE;
- caps->hiz_ram = RV530_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
-
- case 0x7281:
- case 0x7283:
- case 0x7287:
- case 0x7288:
- case 0x7289:
- case 0x728B:
- case 0x728C:
- case 0x7290:
- case 0x7291:
- case 0x7293:
- case 0x7297:
- caps->family = CHIP_FAMILY_RV560;
- caps->num_vert_fpus = 8;
- caps->is_r500 = TRUE;
- caps->hiz_ram = RV530_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
- break;
+ default:
+ fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...",
+ caps->pci_id);
+ abort();
+ }
- default:
- fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\n",
- caps->pci_id);
+ /* Defaults. */
+ caps->high_second_pipe = FALSE;
+ caps->num_vert_fpus = 0;
+ caps->hiz_ram = 0;
+ caps->zmask_ram = 0;
+
+
+ switch (caps->family) {
+ case CHIP_FAMILY_R300:
+ case CHIP_FAMILY_R350:
+ caps->high_second_pipe = TRUE;
+ caps->num_vert_fpus = 4;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_RV350:
+ case CHIP_FAMILY_RV370:
+ caps->high_second_pipe = TRUE;
+ caps->num_vert_fpus = 2;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_RV380:
+ caps->high_second_pipe = TRUE;
+ caps->num_vert_fpus = 2;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_RS400:
+ case CHIP_FAMILY_RS600:
+ case CHIP_FAMILY_RS690:
+ case CHIP_FAMILY_RS740:
+ break;
+
+ case CHIP_FAMILY_RC410:
+ case CHIP_FAMILY_RS480:
+ caps->zmask_ram = RV3xx_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_R420:
+ case CHIP_FAMILY_R423:
+ case CHIP_FAMILY_R430:
+ case CHIP_FAMILY_R480:
+ case CHIP_FAMILY_R481:
+ case CHIP_FAMILY_RV410:
+ caps->num_vert_fpus = 6;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_R520:
+ caps->num_vert_fpus = 8;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_RV515:
+ caps->num_vert_fpus = 2;
+ caps->hiz_ram = R300_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_RV530:
+ caps->num_vert_fpus = 5;
+ caps->hiz_ram = RV530_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
+ break;
+
+ case CHIP_FAMILY_R580:
+ case CHIP_FAMILY_RV560:
+ case CHIP_FAMILY_RV570:
+ caps->num_vert_fpus = 8;
+ caps->hiz_ram = RV530_HIZ_LIMIT;
+ caps->zmask_ram = PIPE_ZMASK_SIZE;
+ break;
}
+ caps->num_tex_units = 16;
+ caps->is_r400 = caps->family >= CHIP_FAMILY_R420 && caps->family < CHIP_FAMILY_RV515;
+ caps->is_r500 = caps->family >= CHIP_FAMILY_RV515;
caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4;
caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
caps->has_us_format = caps->family == CHIP_FAMILY_R520;
+ caps->has_tcl = caps->num_vert_fpus > 0;
+
+ if (caps->has_tcl) {
+ caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
+ }
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index d0050bed2e8..4df6b5b6292 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -96,26 +96,24 @@ struct r300_capabilities {
/* Enumerations for legibility and telling which card we're running on. */
enum {
- CHIP_FAMILY_R300 = 0,
+ CHIP_FAMILY_R300 = 0, /* R3xx-based cores. */
CHIP_FAMILY_R350,
- CHIP_FAMILY_R360,
CHIP_FAMILY_RV350,
CHIP_FAMILY_RV370,
CHIP_FAMILY_RV380,
- CHIP_FAMILY_R420,
+ CHIP_FAMILY_RS400,
+ CHIP_FAMILY_RC410,
+ CHIP_FAMILY_RS480,
+ CHIP_FAMILY_R420, /* R4xx-based cores. */
CHIP_FAMILY_R423,
CHIP_FAMILY_R430,
CHIP_FAMILY_R480,
CHIP_FAMILY_R481,
CHIP_FAMILY_RV410,
- CHIP_FAMILY_RS400,
- CHIP_FAMILY_RC410,
- CHIP_FAMILY_RS480,
- CHIP_FAMILY_RS482,
CHIP_FAMILY_RS600,
CHIP_FAMILY_RS690,
CHIP_FAMILY_RS740,
- CHIP_FAMILY_RV515,
+ CHIP_FAMILY_RV515, /* R5xx-based cores. */
CHIP_FAMILY_R520,
CHIP_FAMILY_RV530,
CHIP_FAMILY_R580,
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 240b841ed2a..05af2148b38 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -49,20 +49,18 @@ static const char* r300_get_vendor(struct pipe_screen* pscreen)
static const char* chip_families[] = {
"ATI R300",
"ATI R350",
- "ATI R360",
"ATI RV350",
"ATI RV370",
"ATI RV380",
+ "ATI RS400",
+ "ATI RC410",
+ "ATI RS480",
"ATI R420",
"ATI R423",
"ATI R430",
"ATI R480",
"ATI R481",
"ATI RV410",
- "ATI RS400",
- "ATI RC410",
- "ATI RS480",
- "ATI RS482",
"ATI RS600",
"ATI RS690",
"ATI RS740",
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 04499c78cc6..121409b2260 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -438,7 +438,7 @@ static void r300_update_rs_block(struct r300_context *r300)
/* Rasterize texture coordinates. */
for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) {
- bool sprite_coord = false;
+ boolean sprite_coord = false;
if (fs_inputs->generic[i] != ATTR_UNUSED) {
sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 54f5410c324..9ebfe54c76d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -380,9 +380,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
- struct r600_pipe_state *rstate;
+ struct r600_pipe_resource_state *rstate;
const struct util_format_description *desc;
struct r600_resource_texture *tmp;
struct r600_resource *rbuffer;
@@ -438,35 +437,27 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
array_mode = tmp->array_mode[0];
tile_type = tmp->tile_type;
- r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
- S_030000_DIM(r600_tex_dim(texture->target)) |
- S_030000_PITCH((pitch / 8) - 1) |
- S_030000_NON_DISP_TILING_ORDER(tile_type) |
- S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
- S_030004_TEX_HEIGHT(texture->height0 - 1) |
- S_030004_TEX_DEPTH(texture->depth0 - 1) |
- S_030004_ARRAY_MODE(array_mode),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
- (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
- (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
- r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- word4 |
- S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
- S_030010_ENDIAN_SWAP(endian) |
- S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
- S_030014_LAST_LEVEL(state->u.tex.last_level) |
- S_030014_BASE_ARRAY(0) |
- S_030014_LAST_ARRAY(0), 0xffffffff, NULL);
- r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
- S_030018_MAX_ANISO(4 /* max 16 samples */),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
- S_03001C_DATA_FORMAT(format) |
- S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
+ rstate->bo[0] = bo[0];
+ rstate->bo[1] = bo[1];
+ rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) |
+ S_030000_PITCH((pitch / 8) - 1) |
+ S_030000_NON_DISP_TILING_ORDER(tile_type) |
+ S_030000_TEX_WIDTH(texture->width0 - 1));
+ rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) |
+ S_030004_TEX_DEPTH(texture->depth0 - 1) |
+ S_030004_ARRAY_MODE(array_mode));
+ rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8;
+ rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8;
+ rstate->val[4] = (word4 |
+ S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
+ S_030010_ENDIAN_SWAP(endian) |
+ S_030010_BASE_LEVEL(state->u.tex.first_level));
+ rstate->val[5] = (S_030014_LAST_LEVEL(state->u.tex.last_level) |
+ S_030014_BASE_ARRAY(0) |
+ S_030014_LAST_ARRAY(0));
+ rstate->val[6] = (S_030018_MAX_ANISO(4 /* max 16 samples */));
+ rstate->val[7] = (S_03001C_DATA_FORMAT(format) |
+ S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE));
return &resource->base;
}
@@ -1032,7 +1023,10 @@ static void cayman_init_config(struct r600_pipe_context *rctx)
tmp |= S_008C00_EXPORT_SRC_C(1);
r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, (4 << 28), 0xFFFFFFFF, NULL);
+ /* always set the temp clauses */
+ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
@@ -1384,21 +1378,38 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
tmp |= S_008C00_ES_PRIO(es_prio);
r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
- tmp = 0;
- tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
- tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
- r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
-
- tmp = 0;
- tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
- tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
- r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
-
- tmp = 0;
- tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs);
- tmp |= S_008C0C_NUM_LS_GPRS(num_ls_gprs);
- r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+ /* enable dynamic GPR resource management */
+ if (r600_get_minor_version(rctx->radeon) >= 7) {
+ /* always set temp clauses */
+ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1,
+ S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
+ S_028838_PS_GPRS(0x1e) |
+ S_028838_VS_GPRS(0x1e) |
+ S_028838_GS_GPRS(0x1e) |
+ S_028838_ES_GPRS(0x1e) |
+ S_028838_HS_GPRS(0x1e) |
+ S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
+ } else {
+ tmp = 0;
+ tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
+ tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+ tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
+ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+
+ tmp = 0;
+ tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
+ tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
+ r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+
+ tmp = 0;
+ tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs);
+ tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
+ r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+ }
tmp = 0;
tmp |= S_008C18_NUM_PS_THREADS(num_ps_threads);
@@ -1769,45 +1780,32 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
}
void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
- struct r600_resource *rbuffer,
- unsigned offset, unsigned stride)
+ struct r600_pipe_resource_state *rstate)
{
rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
- rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
- S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
- S_030008_STRIDE(stride), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
- S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
- 0xC0000000, 0xFFFFFFFF, NULL);
+
+ rstate->val[0] = 0;
+ rstate->bo[0] = NULL;
+ rstate->val[1] = 0;
+ rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32));
+ rstate->val[3] = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W);
+ rstate->val[4] = 0;
+ rstate->val[5] = 0;
+ rstate->val[6] = 0;
+ rstate->val[7] = 0xc0000000;
}
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride)
{
- rstate->nregs = 0;
- r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo);
- r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1);
- r600_pipe_state_mod_reg(rstate, S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
- S_030008_STRIDE(stride));
- rstate->nregs = 8;
-
+ rstate->bo[0] = rbuffer->bo;
+ rstate->val[0] = offset;
+ rstate->val[1] = rbuffer->bo_size - offset - 1;
+ rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
+ S_030008_STRIDE(stride);
}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index ee0c7c9ed9b..d795f5757ed 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -171,6 +171,10 @@
#define S_008C0C_NUM_LS_GPRS(x) (((x) & 0xFF) << 16)
#define G_008C0C_NUM_LS_GPRS(x) (((x) >> 16) & 0xFF)
#define C_008C0C_NUM_LS_GPRS(x) 0xFF00FFFF
+
+#define R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10
+#define R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14
+
#define R_008C18_SQ_THREAD_RESOURCE_MGMT_1 0x00008C18
#define S_008C18_NUM_PS_THREADS(x) (((x) & 0xFF) << 0)
#define G_008C18_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF)
@@ -1637,6 +1641,12 @@
#define R_028818_PA_CL_VTE_CNTL 0x00028818
#define R_028820_PA_CL_NANINF_CNTL 0x00028820
#define R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1 0x00028838
+#define S_028838_PS_GPRS(x) (((x) & 0x1F) << 0)
+#define S_028838_VS_GPRS(x) (((x) & 0x1F) << 5)
+#define S_028838_GS_GPRS(x) (((x) & 0x1F) << 10)
+#define S_028838_ES_GPRS(x) (((x) & 0x1F) << 15)
+#define S_028838_HS_GPRS(x) (((x) & 0x1F) << 20)
+#define S_028838_LS_GPRS(x) (((x) & 0x1F) << 25)
#define R_028840_SQ_PGM_START_PS 0x00028840
#define R_02884C_SQ_PGM_EXPORTS_PS 0x0002884C
#define S_02884C_EXPORT_COLORS(x) (((x) & 0xF) << 1)
@@ -1948,6 +1958,4 @@
#define CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 0x28c38
#define CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 0x28c3c
-#define CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10
-#define CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14
#endif
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 23e7181a86e..bf7138d9e4e 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -30,6 +30,7 @@
#include <stdint.h>
#include <stdio.h>
#include <util/u_double_list.h>
+#include <util/u_inlines.h>
#include <pipe/p_compiler.h>
#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4)
@@ -47,33 +48,6 @@ struct winsys_handle;
enum radeon_family {
CHIP_UNKNOWN,
- CHIP_R100,
- CHIP_RV100,
- CHIP_RS100,
- CHIP_RV200,
- CHIP_RS200,
- CHIP_R200,
- CHIP_RV250,
- CHIP_RS300,
- CHIP_RV280,
- CHIP_R300,
- CHIP_R350,
- CHIP_RV350,
- CHIP_RV380,
- CHIP_R420,
- CHIP_R423,
- CHIP_RV410,
- CHIP_RS400,
- CHIP_RS480,
- CHIP_RS600,
- CHIP_RS690,
- CHIP_RS740,
- CHIP_RV515,
- CHIP_R520,
- CHIP_RV530,
- CHIP_RV560,
- CHIP_RV570,
- CHIP_R580,
CHIP_R600,
CHIP_RV610,
CHIP_RV630,
@@ -130,14 +104,24 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon,
unsigned handle, unsigned *array_mode);
void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
-void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
- struct r600_bo *src);
boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo,
unsigned stride, struct winsys_handle *whandle);
static INLINE unsigned r600_bo_offset(struct r600_bo *bo)
{
return 0;
}
+void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
+
+/* this relies on the pipe_reference being the first member of r600_bo */
+static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src)
+{
+ struct r600_bo *old = *dst;
+
+ if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) {
+ r600_bo_destroy(radeon, old);
+ }
+ *dst = src;
+}
/* R600/R700 STATES */
@@ -170,8 +154,17 @@ struct r600_pipe_state {
struct r600_pipe_reg regs[R600_BLOCK_MAX_REG];
};
+struct r600_pipe_resource_state {
+ unsigned id;
+ u32 val[8];
+ struct r600_bo *bo[2];
+};
+
#define R600_BLOCK_STATUS_ENABLED (1 << 0)
#define R600_BLOCK_STATUS_DIRTY (1 << 1)
+#define R600_BLOCK_STATUS_RESOURCE_DIRTY (1 << 2)
+
+#define R600_BLOCK_STATUS_RESOURCE_VERTEX (1 << 3)
struct r600_block_reloc {
struct r600_bo *bo;
@@ -182,6 +175,7 @@ struct r600_block_reloc {
struct r600_block {
struct list_head list;
+ struct list_head enable_list;
unsigned status;
unsigned flags;
unsigned start_offset;
@@ -245,6 +239,8 @@ struct r600_context {
unsigned nblocks;
struct r600_block **blocks;
struct list_head dirty;
+ struct list_head resource_dirty;
+ struct list_head enable_list;
unsigned pm4_ndwords;
unsigned pm4_cdwords;
unsigned pm4_dirty_cdwords;
@@ -261,6 +257,10 @@ struct r600_context {
unsigned num_dest_buffers;
unsigned flags;
boolean predicate_drawing;
+ struct r600_range ps_resources;
+ struct r600_range vs_resources;
+ struct r600_range fs_resources;
+ int num_ps_resources, num_vs_resources, num_fs_resources;
};
struct r600_draw {
@@ -275,9 +275,9 @@ struct r600_draw {
int r600_context_init(struct r600_context *ctx, struct radeon *radeon);
void r600_context_fini(struct r600_context *ctx);
void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state);
-void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
+void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
+void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
+void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
void r600_context_flush(struct r600_context *ctx);
@@ -303,9 +303,9 @@ void r600_context_flush_dest_caches(struct r600_context *ctx);
int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
void evergreen_context_flush_dest_caches(struct r600_context *ctx);
-void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
+void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
+void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
+void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 65e539eba35..3196d97dbbb 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1088,7 +1088,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al
/* Collect required cache lines. */
for (i = 0; i < 3; ++i) {
- bool found = false;
+ boolean found = false;
unsigned int line;
if (alu->src[i].sel < 512)
@@ -1140,7 +1140,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al
/* Setup the kcache lines. */
for (i = 0; i < count; ++i) {
- bool found = false;
+ boolean found = false;
for (j = 0; j < 2; ++j) {
if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 8002d943abd..d92b74ebc4e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -82,12 +82,12 @@ struct r600_screen {
struct r600_pipe_sampler_view {
struct pipe_sampler_view base;
- struct r600_pipe_state state;
+ struct r600_pipe_resource_state state;
};
struct r600_pipe_rasterizer {
struct r600_pipe_state rstate;
- bool flatshade;
+ boolean flatshade;
unsigned sprite_coord_enable;
float offset_units;
float offset_scale;
@@ -173,7 +173,7 @@ struct r600_pipe_context {
struct r600_pipe_state *states[R600_PIPE_NSTATES];
struct r600_context ctx;
struct r600_vertex_element *vertex_elements;
- struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS];
+ struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS];
struct pipe_framebuffer_state framebuffer;
struct pipe_index_buffer index_buffer;
unsigned cb_target_mask;
@@ -185,25 +185,25 @@ struct r600_pipe_context {
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
struct r600_pipe_state vs_const_buffer;
- struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
+ struct r600_pipe_resource_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_state ps_const_buffer;
- struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
+ struct r600_pipe_resource_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_rasterizer *rasterizer;
struct r600_pipe_state vgt;
struct r600_pipe_state spi;
/* shader information */
unsigned sprite_coord_enable;
- bool flatshade;
- bool export_16bpc;
+ boolean flatshade;
+ boolean export_16bpc;
unsigned alpha_ref;
- bool alpha_ref_dirty;
+ boolean alpha_ref_dirty;
struct r600_textures_info ps_samplers;
struct r600_pipe_fences fences;
struct u_vbuf_mgr *vbuf_mgr;
struct util_slab_mempool pool_transfers;
- bool blit;
+ boolean blit;
};
@@ -224,10 +224,8 @@ void evergreen_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
- struct r600_resource *rbuffer,
- unsigned offset, unsigned stride);
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
+ struct r600_pipe_resource_state *rstate);
+void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride);
@@ -268,10 +266,8 @@ void r600_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element *ve)
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
void r600_polygon_offset_update(struct r600_pipe_context *rctx);
void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
- struct r600_resource *rbuffer,
- unsigned offset, unsigned stride);
-void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
+ struct r600_pipe_resource_state *rstate);
+void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 46fdbfed34a..a6cfa704ca5 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -414,7 +414,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
- struct r600_pipe_state *rstate;
+ struct r600_pipe_resource_state *rstate;
const struct util_format_description *desc;
struct r600_resource_texture *tmp;
struct r600_resource *rbuffer;
@@ -477,33 +477,29 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
depth = texture->array_size;
}
- r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
- S_038000_DIM(r600_tex_dim(texture->target)) |
- S_038000_TILE_MODE(array_mode) |
- S_038000_TILE_TYPE(tile_type) |
- S_038000_PITCH((pitch / 8) - 1) |
- S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- S_038004_TEX_HEIGHT(height - 1) |
- S_038004_TEX_DEPTH(depth - 1) |
- S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
- (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
- r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
- (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
- r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- word4 |
- S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
- S_038010_REQUEST_SIZE(1) |
- S_038010_ENDIAN_SWAP(endian) |
- S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
- S_038014_LAST_LEVEL(state->u.tex.last_level) |
- S_038014_BASE_ARRAY(state->u.tex.first_layer) |
- S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
- S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) |
- S_038018_MAX_ANISO(4 /* max 16 samples */), 0xFFFFFFFF, NULL);
+ rstate->bo[0] = bo[0];
+ rstate->bo[1] = bo[1];
+
+ rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) |
+ S_038000_TILE_MODE(array_mode) |
+ S_038000_TILE_TYPE(tile_type) |
+ S_038000_PITCH((pitch / 8) - 1) |
+ S_038000_TEX_WIDTH(texture->width0 - 1));
+ rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) |
+ S_038004_TEX_DEPTH(depth - 1) |
+ S_038004_DATA_FORMAT(format));
+ rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8;
+ rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8;
+ rstate->val[4] = (word4 |
+ S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
+ S_038010_REQUEST_SIZE(1) |
+ S_038010_ENDIAN_SWAP(endian) |
+ S_038010_BASE_LEVEL(state->u.tex.first_level));
+ rstate->val[5] = (S_038014_LAST_LEVEL(state->u.tex.last_level) |
+ S_038014_BASE_ARRAY(state->u.tex.first_layer) |
+ S_038014_LAST_ARRAY(state->u.tex.last_layer));
+ rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) |
+ S_038018_MAX_ANISO(4 /* max 16 samples */));
return &resource->base;
}
@@ -1486,37 +1482,27 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
}
void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
- struct r600_pipe_state *rstate,
- struct r600_resource *rbuffer,
- unsigned offset, unsigned stride)
+ struct r600_pipe_resource_state *rstate)
{
rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
- S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
- S_038008_STRIDE(stride), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
- 0xC0000000, 0xFFFFFFFF, NULL);
+
+ rstate->bo[0] = NULL;
+ rstate->val[0] = 0;
+ rstate->val[1] = 0;
+ rstate->val[2] = 0;
+ rstate->val[3] = 0;
+ rstate->val[4] = 0;
+ rstate->val[5] = 0;
+ rstate->val[6] = 0xc0000000;
}
-void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate,
+void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride)
{
- rstate->nregs = 0;
- r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo);
- r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1);
- r600_pipe_state_mod_reg(rstate, S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
- S_038008_STRIDE(stride));
- rstate->nregs = 7;
+ rstate->val[0] = offset;
+ rstate->bo[0] = rbuffer->bo;
+ rstate->val[1] = rbuffer->bo_size - offset - 1;
+ rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
+ S_038008_STRIDE(stride);
}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 48ab15f9323..a670ac02be2 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -318,12 +318,10 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
static void r600_update_alpha_ref(struct r600_pipe_context *rctx)
{
- unsigned alpha_ref = rctx->alpha_ref;
+ unsigned alpha_ref;
struct r600_pipe_state rstate;
- if (!rctx->alpha_ref_dirty)
- return;
-
+ alpha_ref = rctx->alpha_ref;
rstate.nregs = 0;
if (rctx->export_16bpc)
alpha_ref &= ~0x1FFF;
@@ -388,7 +386,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_resource_buffer *rbuffer = r600_buffer(buffer);
- struct r600_pipe_state *rstate;
+ struct r600_pipe_resource_state *rstate;
uint32_t offset;
/* Note that the state tracker can unbind constant buffers by
@@ -416,9 +414,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
rstate = &rctx->vs_const_buffer_resource[index];
if (!rstate->id) {
if (rctx->family >= CHIP_CEDAR) {
- evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_pipe_init_buffer_resource(rctx, rstate);
} else {
- r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_pipe_init_buffer_resource(rctx, rstate);
}
}
@@ -444,9 +442,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
rstate = &rctx->ps_const_buffer_resource[index];
if (!rstate->id) {
if (rctx->family >= CHIP_CEDAR) {
- evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_pipe_init_buffer_resource(rctx, rstate);
} else {
- r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_pipe_init_buffer_resource(rctx, rstate);
}
}
if (rctx->family >= CHIP_CEDAR) {
@@ -468,7 +466,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
{
- struct r600_pipe_state *rstate;
+ struct r600_pipe_resource_state *rstate;
struct r600_resource *rbuffer;
struct pipe_vertex_buffer *vertex_buffer;
unsigned i, count, offset;
@@ -503,9 +501,9 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
if (!rstate->id) {
if (rctx->family >= CHIP_CEDAR) {
- evergreen_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ evergreen_pipe_init_buffer_resource(rctx, rstate);
} else {
- r600_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ r600_pipe_init_buffer_resource(rctx, rstate);
}
}
@@ -595,7 +593,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
return;
}
- r600_update_alpha_ref(rctx);
+ if (rctx->alpha_ref_dirty)
+ r600_update_alpha_ref(rctx);
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index bb72cf63bc5..ac2e65b988e 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -83,7 +83,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SM3:
return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
- return 0;
+ return 1;
case PIPE_CAP_POINT_SPRITE:
return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
@@ -163,7 +163,7 @@ softpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_POINT_WIDTH_AA:
return 255.0; /* arbitrary */
case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
- return 16.0; /* not actually signficant at this time */
+ return 16.0;
case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
return 16.0; /* arbitrary */
default:
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 1446aee2aa4..90766f4119c 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1709,6 +1709,317 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
}
+/* For anisotropic filtering */
+#define WEIGHT_LUT_SIZE 1024
+
+static float *weightLut = NULL;
+
+/**
+ * Creates the look-up table used to speed-up EWA sampling
+ */
+static void
+create_filter_table(void)
+{
+ unsigned i;
+ if (!weightLut) {
+ weightLut = (float *) malloc(WEIGHT_LUT_SIZE * sizeof(float));
+
+ for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
+ float alpha = 2;
+ float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
+ float weight = (float) exp(-alpha * r2);
+ weightLut[i] = weight;
+ }
+ }
+}
+
+
+/**
+ * Elliptical weighted average (EWA) filter for producing high quality
+ * anisotropic filtered results.
+ * Based on the Higher Quality Elliptical Weighted Avarage Filter
+ * published by Paul S. Heckbert in his Master's Thesis
+ * "Fundamentals of Texture Mapping and Image Warping" (1989)
+ */
+static void
+img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ const float dudx, const float dvdx,
+ const float dudy, const float dvdy,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+
+ unsigned level0 = samp->level > 0 ? samp->level : 0;
+ float scaling = 1.0 / (1 << level0);
+ int width = u_minify(texture->width0, level0);
+ int height = u_minify(texture->height0, level0);
+
+ float ux = dudx * scaling;
+ float vx = dvdx * scaling;
+ float uy = dudy * scaling;
+ float vy = dvdy * scaling;
+
+ /* compute ellipse coefficients to bound the region:
+ * A*x*x + B*x*y + C*y*y = F.
+ */
+ float A = vx*vx+vy*vy+1;
+ float B = -2*(ux*vx+uy*vy);
+ float C = ux*ux+uy*uy+1;
+ float F = A*C-B*B/4.0;
+
+ /* check if it is an ellipse */
+ /* ASSERT(F > 0.0); */
+
+ /* Compute the ellipse's (u,v) bounding box in texture space */
+ float d = -B*B+4.0*C*A;
+ float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with */
+ float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */
+
+ float rgba_temp[NUM_CHANNELS][QUAD_SIZE];
+ float s_buffer[QUAD_SIZE];
+ float t_buffer[QUAD_SIZE];
+ float weight_buffer[QUAD_SIZE];
+ unsigned buffer_next;
+ int j;
+ float den;// = 0.0F;
+ float ddq;
+ float U;// = u0 - tex_u;
+ int v;
+
+ /* Scale ellipse formula to directly index the Filter Lookup Table.
+ * i.e. scale so that F = WEIGHT_LUT_SIZE-1
+ */
+ double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
+ A *= formScale;
+ B *= formScale;
+ C *= formScale;
+ /* F *= formScale; */ /* no need to scale F as we don't use it below here */
+
+ /* For each quad, the du and dx values are the same and so the ellipse is
+ * also the same. Note that texel/image access can only be performed using
+ * a quad, i.e. it is not possible to get the pixel value for a single
+ * tex coord. In order to have a better performance, the access is buffered
+ * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is full,
+ * then the pixel values are read from the image.
+ */
+ ddq = 2 * A;
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
+ * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
+ * value, q, is less than F, we're inside the ellipse
+ */
+ float tex_u=-0.5 + s[j] * texture->width0 * scaling;
+ float tex_v=-0.5 + t[j] * texture->height0 * scaling;
+
+ int u0 = floor(tex_u - box_u);
+ int u1 = ceil (tex_u + box_u);
+ int v0 = floor(tex_v - box_v);
+ int v1 = ceil (tex_v + box_v);
+
+ float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
+ buffer_next = 0;
+ den = 0;
+ U = u0 - tex_u;
+ for (v = v0; v <= v1; ++v) {
+ float V = v - tex_v;
+ float dq = A * (2 * U + 1) + B * V;
+ float q = (C * V + B * U) * V + A * U * U;
+
+ int u;
+ for (u = u0; u <= u1; ++u) {
+ /* Note that the ellipse has been pre-scaled so F = WEIGHT_LUT_SIZE - 1 */
+ if (q < WEIGHT_LUT_SIZE) {
+ /* as a LUT is used, q must never be negative;
+ * should not happen, though
+ */
+ const int qClamped = q >= 0.0F ? q : 0;
+ float weight = weightLut[qClamped];
+
+ weight_buffer[buffer_next] = weight;
+ s_buffer[buffer_next] = u / ((float) width);
+ t_buffer[buffer_next] = v / ((float) height);
+
+ buffer_next++;
+ if (buffer_next == QUAD_SIZE) {
+ /* 4 texel coords are in the buffer -> read it now */
+ int jj;
+ /* it is assumed that samp->min_img_filter is set to
+ * img_filter_2d_nearest or one of the
+ * accelerated img_filter_2d_nearest_XXX functions.
+ */
+ samp->min_img_filter(tgsi_sampler, s_buffer, t_buffer, p, NULL,
+ tgsi_sampler_lod_bias, rgba_temp);
+ for (jj = 0; jj < buffer_next; jj++) {
+ num[0] += weight_buffer[jj] * rgba_temp[0][jj];
+ num[1] += weight_buffer[jj] * rgba_temp[1][jj];
+ num[2] += weight_buffer[jj] * rgba_temp[2][jj];
+ num[3] += weight_buffer[jj] * rgba_temp[3][jj];
+ }
+
+ buffer_next = 0;
+ }
+
+ den += weight;
+ }
+ q += dq;
+ dq += ddq;
+ }
+ }
+
+ /* if the tex coord buffer contains unread values, we will read them now.
+ * Note that in most cases we have to read more pixel values than required,
+ * however, as the img_filter_2d_nearest function(s) does not have a count
+ * parameter, we need to read the whole quad and ignore the unused values
+ */
+ if (buffer_next > 0) {
+ int jj;
+ /* it is assumed that samp->min_img_filter is set to
+ * img_filter_2d_nearest or one of the
+ * accelerated img_filter_2d_nearest_XXX functions.
+ */
+ samp->min_img_filter(tgsi_sampler, s_buffer, t_buffer, p, NULL,
+ tgsi_sampler_lod_bias, rgba_temp);
+ for (jj = 0; jj < buffer_next; jj++) {
+ num[0] += weight_buffer[jj] * rgba_temp[0][jj];
+ num[1] += weight_buffer[jj] * rgba_temp[1][jj];
+ num[2] += weight_buffer[jj] * rgba_temp[2][jj];
+ num[3] += weight_buffer[jj] * rgba_temp[3][jj];
+ }
+ }
+
+ if (den <= 0.0F) {
+ /* Reaching this place would mean
+ * that no pixels intersected the ellipse.
+ * This should never happen because
+ * the filter we use always
+ * intersects at least one pixel.
+ */
+
+ /*rgba[0]=0;
+ rgba[1]=0;
+ rgba[2]=0;
+ rgba[3]=0;*/
+ /* not enough pixels in resampling, resort to direct interpolation */
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba_temp);
+ den = 1;
+ num[0] = rgba_temp[0][j];
+ num[1] = rgba_temp[1][j];
+ num[2] = rgba_temp[2][j];
+ num[3] = rgba_temp[3][j];
+ }
+
+ rgba[0][j] = num[0] / den;
+ rgba[1][j] = num[1] / den;
+ rgba[2][j] = num[2] / den;
+ rgba[3][j] = num[3] / den;
+ }
+}
+
+
+/**
+ * Sample 2D texture using an anisotropic filter.
+ */
+static void
+mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ int level0;
+ float lambda;
+ float lod[QUAD_SIZE];
+
+ float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level);
+ float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level);
+ float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+ float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+ float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+ float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+
+ if (control == tgsi_sampler_lod_bias) {
+ /* note: instead of working with Px and Py, we will use the
+ * squared length instead, to avoid sqrt.
+ */
+ float Px2 = dudx * dudx + dvdx * dvdx;
+ float Py2 = dudy * dudy + dvdy * dvdy;
+
+ float Pmax2;
+ float Pmin2;
+ float e;
+ const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy;
+
+ if (Px2 < Py2) {
+ Pmax2 = Py2;
+ Pmin2 = Px2;
+ }
+ else {
+ Pmax2 = Px2;
+ Pmin2 = Py2;
+ }
+
+ /* if the eccentricity of the ellipse is too big, scale up the shorter
+ * of the two vectors to limit the maximum amount of work per pixel
+ */
+ e = Pmax2 / Pmin2;
+ if (e > maxEccentricity) {
+ /* float s=e / maxEccentricity;
+ minor[0] *= s;
+ minor[1] *= s;
+ Pmin2 *= s; */
+ Pmin2 = Pmax2 / maxEccentricity;
+ }
+
+ /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
+ * this since 0.5*log(x) = log(sqrt(x))
+ */
+ lambda = 0.5 * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ }
+ else {
+ assert(control == tgsi_sampler_lod_explicit);
+
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
+ level0 = samp->view->u.tex.first_level + (int)lambda;
+
+ /* If the ellipse covers the whole image, we can
+ * simply return the average of the whole image.
+ */
+ if (level0 >= texture->last_level) {
+ samp->level = texture->last_level;
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
+ }
+ else {
+ /* don't bother interpolating between multiple LODs; it doesn't
+ * seem to be worth the extra running time.
+ */
+ samp->level = level0;
+ img_filter_2d_ewa(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias,
+ dudx, dvdx, dudy, dvdy, rgba);
+ }
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
+}
+
+
/**
* Specialized version of mip_filter_linear with hard-wired calls to
@@ -2316,14 +2627,33 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
sampler->normalized_coords &&
sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
- sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
- {
+ sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
}
- else
- {
+ else {
samp->mip_filter = mip_filter_linear;
}
+
+ /* Anisotropic filtering extension. */
+ if (sampler->max_anisotropy > 1) {
+ samp->mip_filter = mip_filter_linear_aniso;
+
+ /* Override min_img_filter:
+ * min_img_filter needs to be set to NEAREST since we need to access
+ * each texture pixel as it is and weight it later; using linear
+ * filters will have incorrect results.
+ * By setting the filter to NEAREST here, we can avoid calling the
+ * generic img_filter_2d_nearest in the anisotropic filter function,
+ * making it possible to use one of the accelerated implementations
+ */
+ samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler);
+
+ /* on first access create the lookup table containing the filter weights. */
+ if (!weightLut) {
+ create_filter_table();
+ }
+ }
+
break;
}
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index 1ed1d5d25bb..ebcd4bcaf10 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -46,10 +46,10 @@
* to have allocated the fifo space before converting.
*
* Results:
- * id is filld out.
+ * id is filled out.
*
* Side effects:
- * One surface relocation is preformed for texture handle.
+ * One surface relocation is performed for texture handle.
*
*----------------------------------------------------------------------
*/
@@ -224,7 +224,7 @@ SVGA3D_DestroyContext(struct svga_winsys_context *swc) // IN
* containers for host VRAM objects like textures, vertex
* buffers, and depth/stencil buffers.
*
- * Surfaces are hierarchial:
+ * Surfaces are hierarchical:
*
* - Surface may have multiple faces (for cube maps)
*
@@ -376,11 +376,9 @@ SVGA3D_DestroySurface(struct svga_winsys_context *swc,
/*
*----------------------------------------------------------------------
*
- * SVGA3D_BeginSurfaceDMA--
+ * SVGA3D_SurfaceDMA--
*
- * Begin a SURFACE_DMA command. This reserves space for it in
- * the FIFO, and returns a pointer to the command's box array.
- * This function must be paired with SVGA_FIFOCommitAll().
+ * Emit a SURFACE_DMA command.
*
* When the SVGA3D device asynchronously processes this FIFO
* command, a DMA operation is performed between host VRAM and