diff options
Diffstat (limited to 'src/gallium/drivers')
25 files changed, 869 insertions, 690 deletions
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index 78554034781..14ae749c828 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -95,7 +95,7 @@ static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - size_t offset, bool fenced) + size_t offset, boolean fenced) { return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset, fenced); } diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index dacf50e870d..964948edc0e 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -124,6 +124,12 @@ struct i915_fragment_shader * Else, the bitmask indicates which components are occupied by immediates. */ ubyte constant_flags[I915_MAX_CONSTANT]; + + /** + * The mapping between generics and hw texture coords. + * We need to share this between the vertex and fragment stages. + **/ + int generic_mapping[I915_TEX_UNITS]; }; diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index b145b58be30..27f100843bf 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -133,7 +133,21 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) p->error = 1; } - +static uint get_mapping(struct i915_fragment_shader* fs, int unit) +{ + int i; + for (i = 0; i < I915_TEX_UNITS; i++) + { + if (fs->generic_mapping[i] == -1) { + fs->generic_mapping[i] = unit; + return i; + } + if (fs->generic_mapping[i] == unit) + return i; + } + debug_printf("Exceeded max generics\n"); + return 0; +} /** * Construct a ureg for the given source register. Will emit @@ -141,7 +155,8 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) */ static uint src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source) + const struct tgsi_full_src_register *source, + struct i915_fragment_shader* fs) { uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; @@ -192,9 +207,11 @@ src_vector(struct i915_fp_compile *p, src = swizzle(src, W, W, W, W); break; case TGSI_SEMANTIC_GENERIC: - /* usually a texcoord */ - src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); - break; + { + int real_tex_unit = get_mapping(fs, sem_ind); + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); + break; + } default: i915_program_error(p, "Bad source->Index"); return 0; @@ -336,13 +353,14 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) static void emit_tex(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode) + uint opcode, + struct i915_fragment_shader* fs) { uint texture = inst->Texture.Texture; uint unit = inst->Src[1].Register.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); - uint coord = src_vector( p, &inst->Src[0]); + uint coord = src_vector( p, &inst->Src[0], fs); i915_emit_texld( p, get_result_vector( p, &inst->Dst[0] ), @@ -361,15 +379,16 @@ emit_tex(struct i915_fp_compile *p, static void emit_simple_arith(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) + uint opcode, uint numArgs, + struct i915_fragment_shader* fs) { uint arg1, arg2, arg3; assert(numArgs <= 3); - arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] ); - arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] ); - arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] ); + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); i915_emit_arith( p, opcode, @@ -385,7 +404,8 @@ emit_simple_arith(struct i915_fp_compile *p, static void emit_simple_arith_swap2(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) + uint opcode, uint numArgs, + struct i915_fragment_shader* fs) { struct tgsi_full_instruction inst2; @@ -396,7 +416,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, inst2.Src[0] = inst->Src[1]; inst2.Src[1] = inst->Src[0]; - emit_simple_arith(p, &inst2, opcode, numArgs); + emit_simple_arith(p, &inst2, opcode, numArgs, fs); } @@ -415,7 +435,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, */ static void i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_instruction *inst, + struct i915_fragment_shader *fs) { uint writemask; uint src0, src1, src2, flags; @@ -423,7 +444,7 @@ i915_translate_instruction(struct i915_fp_compile *p, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_MAX, get_result_vector(p, &inst->Dst[0]), @@ -432,13 +453,13 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_ADD: - emit_simple_arith(p, inst, A0_ADD, 2); + emit_simple_arith(p, inst, A0_ADD, 2, fs); break; case TGSI_OPCODE_CMP: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); - src2 = src_vector(p, &inst->Src[2]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + src2 = src_vector(p, &inst->Src[2], fs); i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), @@ -446,7 +467,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_COS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -495,17 +516,28 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_const4fv(p, cos_constants), 0); break; + case TGSI_OPCODE_DP2: + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + + i915_emit_arith(p, + A0_DP3, + get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, ZERO, ZERO), src1, 0); + break; + case TGSI_OPCODE_DP3: - emit_simple_arith(p, inst, A0_DP3, 2); + emit_simple_arith(p, inst, A0_DP3, 2, fs); break; case TGSI_OPCODE_DP4: - emit_simple_arith(p, inst, A0_DP4, 2); + emit_simple_arith(p, inst, A0_DP4, 2, fs); break; case TGSI_OPCODE_DPH: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); i915_emit_arith(p, A0_DP4, @@ -515,8 +547,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DST: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); /* result[0] = 1 * 1; * result[1] = a[1] * b[1]; @@ -536,7 +568,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_EX2: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_EXP, @@ -546,16 +578,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_FLR: - emit_simple_arith(p, inst, A0_FLR, 1); + emit_simple_arith(p, inst, A0_FLR, 1, fs); break; case TGSI_OPCODE_FRC: - emit_simple_arith(p, inst, A0_FRC, 1); + emit_simple_arith(p, inst, A0_FRC, 1, fs); break; case TGSI_OPCODE_KIL: /* kill if src[0].x < 0 || src[0].y < 0 ... */ - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_texld(p, @@ -571,7 +603,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LG2: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_LOG, @@ -581,7 +613,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LIT: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); /* tmp = max( a.xyzw, a.00zw ) @@ -614,9 +646,9 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LRP: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); - src2 = src_vector(p, &inst->Src[2]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + src2 = src_vector(p, &inst->Src[2], fs); flags = get_result_flags(inst); tmp = i915_get_utemp(p); @@ -636,16 +668,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MAD: - emit_simple_arith(p, inst, A0_MAD, 3); + emit_simple_arith(p, inst, A0_MAD, 3, fs); break; case TGSI_OPCODE_MAX: - emit_simple_arith(p, inst, A0_MAX, 2); + emit_simple_arith(p, inst, A0_MAX, 2, fs); break; case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -662,16 +694,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MOV: - emit_simple_arith(p, inst, A0_MOV, 1); + emit_simple_arith(p, inst, A0_MOV, 1, fs); break; case TGSI_OPCODE_MUL: - emit_simple_arith(p, inst, A0_MUL, 2); + emit_simple_arith(p, inst, A0_MUL, 2, fs); break; case TGSI_OPCODE_POW: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -695,17 +727,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_RCP: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, + get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_RSQ: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_RSQ, @@ -715,7 +747,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SCS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); /* @@ -778,17 +810,40 @@ i915_translate_instruction(struct i915_fp_compile *p, } break; - case TGSI_OPCODE_SGE: - emit_simple_arith(p, inst, A0_SGE, 2); + case TGSI_OPCODE_SEQ: + /* if we're both >= and <= then we're == */ + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_SGE, + tmp, A0_DEST_CHANNEL_ALL, 0, + src0, + src1, 0); + + i915_emit_arith(p, + A0_SGE, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + src1, + src0, 0); + + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + get_result_vector(p, &inst->Dst[0]), + tmp, 0); + break; - case TGSI_OPCODE_SLE: - /* like SGE, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SGE, 2); + case TGSI_OPCODE_SGE: + emit_simple_arith(p, inst, A0_SGE, 2, fs); break; case TGSI_OPCODE_SIN: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -837,18 +892,78 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_const4fv(p, sin_constants), 0); break; + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); + break; + case TGSI_OPCODE_SLT: - emit_simple_arith(p, inst, A0_SLT, 2); + emit_simple_arith(p, inst, A0_SLT, 2, fs); break; case TGSI_OPCODE_SGT: /* like SLT, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SLT, 2); + emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); + break; + + case TGSI_OPCODE_SNE: + /* if we're neither < nor > then we're != */ + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_SLT, + tmp, + A0_DEST_CHANNEL_ALL, 0, + src0, + src1, 0); + + i915_emit_arith(p, + A0_SLT, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + src1, + src0, 0); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + get_result_vector(p, &inst->Dst[0]), + tmp, 0); + break; + + case TGSI_OPCODE_SSG: + /* compute (src>0) - (src<0) */ + src0 = src_vector(p, &inst->Src[0], fs); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_SLT, + tmp, + A0_DEST_CHANNEL_ALL, 0, + src0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); + + i915_emit_arith(p, + A0_SLT, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), + src0, 0); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + get_result_vector(p, &inst->Dst[0]), + negate(tmp, 1, 1, 1, 1), 0); break; case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); i915_emit_arith(p, A0_ADD, @@ -858,15 +973,19 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_TEX: - emit_tex(p, inst, T0_TEXLD); + emit_tex(p, inst, T0_TEXLD, fs); + break; + + case TGSI_OPCODE_TRUNC: + emit_simple_arith(p, inst, A0_TRC, 1, fs); break; case TGSI_OPCODE_TXB: - emit_tex(p, inst, T0_TEXLDB); + emit_tex(p, inst, T0_TEXLDB, fs); break; case TGSI_OPCODE_TXP: - emit_tex(p, inst, T0_TEXLDP); + emit_tex(p, inst, T0_TEXLDP, fs); break; case TGSI_OPCODE_XPD: @@ -876,8 +995,8 @@ i915_translate_instruction(struct i915_fp_compile *p, * result.z = src0.x * src1.y - src0.y * src1.x; * result.w = undef; */ - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -912,7 +1031,8 @@ i915_translate_instruction(struct i915_fp_compile *p, */ static void i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens) + const struct tgsi_token *tokens, + struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; struct tgsi_parse_context parse; @@ -993,7 +1113,7 @@ i915_translate_instructions(struct i915_fp_compile *p, p->first_instruction = FALSE; } - i915_translate_instruction(p, &parse.FullToken.FullInstruction); + i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); break; default: @@ -1011,6 +1131,7 @@ i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs) { struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + int i; p->shader = ifs; @@ -1023,6 +1144,9 @@ i915_init_compile(struct i915_context *i915, ifs->num_constants = 0; memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + for (i = 0; i < I915_TEX_UNITS; i++) + ifs->generic_mapping[i] = -1; + p->first_instruction = TRUE; p->nr_tex_indirect = 1; /* correct? */ @@ -1192,7 +1316,7 @@ i915_translate_fragment_program( struct i915_context *i915, p = i915_init_compile(i915, fs); i915_find_wpos_space(p); - i915_translate_instructions(p, tokens); + i915_translate_instructions(p, tokens, fs); i915_fixup_depth_write(p); i915_fini_compile(i915, p); diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index e05b059706d..b74b19d0fe4 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -700,7 +700,8 @@ i915_texture_destroy(struct pipe_screen *screen, struct i915_winsys *iws = i915_screen(screen)->iws; uint i; - iws->buffer_destroy(iws, tex->buffer); + if (tex->buffer) + iws->buffer_destroy(iws, tex->buffer); for (i = 0; i < Elements(tex->image_offset); i++) if (tex->image_offset[i]) diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index da96b420f2c..c86baa58b28 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -134,6 +134,11 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_TIMER_QUERY: + case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 0; /* Features we can lie about (boolean caps). */ diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 59ac2f7292a..bf6b30a4530 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -35,6 +35,18 @@ #include "i915_debug.h" #include "i915_reg.h" +static uint find_mapping(struct i915_fragment_shader* fs, int unit) +{ + int i; + for (i = 0; i < I915_TEX_UNITS ; i++) + { + if (fs->generic_mapping[i] == unit) + return i; + } + debug_printf("Mapping not found\n"); + return 0; +} + /*********************************************************************** @@ -46,7 +58,7 @@ static void calculate_vertex_layout(struct i915_context *i915) const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - boolean texCoords[8], colors[2], fog, needW; + boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW; uint i; int src; @@ -66,11 +78,12 @@ static void calculate_vertex_layout(struct i915_context *i915) colors[fs->info.input_semantic_index[i]] = TRUE; break; case TGSI_SEMANTIC_GENERIC: - /* usually a texcoord */ { - const uint unit = fs->info.input_semantic_index[i]; - assert(unit < 8); - texCoords[unit] = TRUE; + /* texcoords/varyings/other generic */ + /* XXX handle back/front face and point size */ + uint unit = fs->info.input_semantic_index[i]; + + texCoords[find_mapping(fs, unit)] = TRUE; needW = TRUE; } break; @@ -82,7 +95,7 @@ static void calculate_vertex_layout(struct i915_context *i915) } } - + /* pos */ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { @@ -120,12 +133,12 @@ static void calculate_vertex_layout(struct i915_context *i915) vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; } - /* texcoords */ - for (i = 0; i < 8; i++) { + /* texcoords/varyings */ + for (i = 0; i < I915_TEX_UNITS; i++) { uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4068bed393c..ba9705bebee 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -71,7 +71,7 @@ lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxil LDFLAGS += $(LLVM_LDFLAGS) LIBS += -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) $(GL_LIB_DEPS) -LD=g++ +LD=$(CXX) $(PROGS): lp_test_main.o libllvmpipe.a diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 8d75dd07c4d..fb125f3a8d8 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -508,7 +508,7 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { unsigned s, i; - for (s = 0; s < 5; ++s) + for (s = 0; s < 3; ++s) for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i) if (nv50_context(pipe)->samplers[s][i] == hwcso) nv50_context(pipe)->samplers[s][i] = NULL; diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index ced26494e15..339906e6a63 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -84,7 +84,7 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format) } static INLINE void -nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write) +nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, boolean for_write) { rgn->x = x; rgn->y = y; @@ -120,7 +120,7 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, } static INLINE void -nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, bool for_write) +nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, boolean for_write) { if(pt->target != PIPE_BUFFER) { diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index 2debcb6eb8f..cc4b51ec1f8 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -56,7 +56,7 @@ nvfx_transfer_new(struct pipe_context *pipe, else { struct nvfx_staging_transfer* tx; - bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR; + boolean direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR; tx = CALLOC_STRUCT(nvfx_staging_transfer); if(!tx) diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 37b0f01cfd3..dfedf353877 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -30,7 +30,8 @@ C_SOURCES = \ r300_transfer.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler + -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ + -I$(TOP)/include COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 4949703120d..571986c3011 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2011 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -32,392 +33,109 @@ /* Parse a PCI ID and fill an r300_capabilities struct with information. */ void r300_parse_chipset(struct r300_capabilities* caps) { - /* Reasonable defaults */ - caps->num_vert_fpus = 2; - caps->num_tex_units = 16; - caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; - caps->hiz_ram = 0; - caps->is_r400 = FALSE; - caps->is_r500 = FALSE; - caps->high_second_pipe = FALSE; - - /* Note: These are not ordered by PCI ID. I leave that task to GCC, - * which will perform the ordering while collating jump tables. Instead, - * I've tried to group them according to capabilities and age. */ switch (caps->pci_id) { - case 0x4144: - caps->family = CHIP_FAMILY_R300; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4145: - case 0x4146: - case 0x4147: - case 0x4E44: - case 0x4E45: - case 0x4E46: - case 0x4E47: - caps->family = CHIP_FAMILY_R300; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4150: - case 0x4151: - case 0x4152: - case 0x4153: - case 0x4154: - case 0x4155: - case 0x4156: - case 0x4E50: - case 0x4E51: - case 0x4E52: - case 0x4E53: - case 0x4E54: - case 0x4E56: - caps->family = CHIP_FAMILY_RV350; - caps->high_second_pipe = TRUE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x4148: - case 0x4149: - case 0x414A: - case 0x414B: - case 0x4E48: - case 0x4E49: - case 0x4E4B: - caps->family = CHIP_FAMILY_R350; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4E4A: - caps->family = CHIP_FAMILY_R360; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5460: - case 0x5462: - case 0x5464: - case 0x5B60: - case 0x5B62: - case 0x5B63: - case 0x5B64: - case 0x5B65: - caps->family = CHIP_FAMILY_RV370; - caps->high_second_pipe = TRUE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x3150: - case 0x3152: - case 0x3154: - case 0x3155: - case 0x3E50: - case 0x3E54: - caps->family = CHIP_FAMILY_RV380; - caps->high_second_pipe = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x4A48: - case 0x4A49: - case 0x4A4A: - case 0x4A4B: - case 0x4A4C: - case 0x4A4D: - case 0x4A4E: - case 0x4A4F: - case 0x4A50: - case 0x4A54: - caps->family = CHIP_FAMILY_R420; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5548: - case 0x5549: - case 0x554A: - case 0x554B: - case 0x5550: - case 0x5551: - case 0x5552: - case 0x5554: - case 0x5D57: - caps->family = CHIP_FAMILY_R423; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x554C: - case 0x554D: - case 0x554E: - case 0x554F: - case 0x5D48: - case 0x5D49: - case 0x5D4A: - caps->family = CHIP_FAMILY_R430; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5D4C: - case 0x5D4D: - case 0x5D4E: - case 0x5D4F: - case 0x5D50: - case 0x5D52: - caps->family = CHIP_FAMILY_R480; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4B48: - case 0x4B49: - case 0x4B4A: - case 0x4B4B: - case 0x4B4C: - caps->family = CHIP_FAMILY_R481; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5E4C: - case 0x5E4F: - case 0x564A: - case 0x564B: - case 0x564F: - case 0x5652: - case 0x5653: - case 0x5657: - case 0x5E48: - case 0x5E4A: - case 0x5E4B: - case 0x5E4D: - caps->family = CHIP_FAMILY_RV410; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5954: - case 0x5955: - caps->family = CHIP_FAMILY_RS480; - caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; +#define CHIPSET(pci_id, name, chipfamily) \ + case pci_id: \ + caps->family = CHIP_FAMILY_##chipfamily; \ break; +#include "pci_ids/r300_pci_ids.h" +#undef CHIPSET - case 0x5974: - case 0x5975: - caps->family = CHIP_FAMILY_RS482; - caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x5A41: - case 0x5A42: - caps->family = CHIP_FAMILY_RS400; - caps->has_tcl = FALSE; - break; - - case 0x5A61: - case 0x5A62: - caps->family = CHIP_FAMILY_RC410; - caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x791E: - case 0x791F: - caps->family = CHIP_FAMILY_RS690; - caps->has_tcl = FALSE; - caps->is_r400 = TRUE; - break; - - case 0x793F: - case 0x7941: - case 0x7942: - caps->family = CHIP_FAMILY_RS600; - caps->has_tcl = FALSE; - caps->is_r400 = TRUE; - break; - - case 0x796C: - case 0x796D: - case 0x796E: - case 0x796F: - caps->family = CHIP_FAMILY_RS740; - caps->has_tcl = FALSE; - caps->is_r400 = TRUE; - break; - - case 0x7100: - case 0x7101: - case 0x7102: - case 0x7103: - case 0x7104: - case 0x7105: - case 0x7106: - case 0x7108: - case 0x7109: - case 0x710A: - case 0x710B: - case 0x710C: - case 0x710E: - case 0x710F: - caps->family = CHIP_FAMILY_R520; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7140: - case 0x7141: - case 0x7142: - case 0x7143: - case 0x7144: - case 0x7145: - case 0x7146: - case 0x7147: - case 0x7149: - case 0x714A: - case 0x714B: - case 0x714C: - case 0x714D: - case 0x714E: - case 0x714F: - case 0x7151: - case 0x7152: - case 0x7153: - case 0x715E: - case 0x715F: - case 0x7180: - case 0x7181: - case 0x7183: - case 0x7186: - case 0x7187: - case 0x7188: - case 0x718A: - case 0x718B: - case 0x718C: - case 0x718D: - case 0x718F: - case 0x7193: - case 0x7196: - case 0x719B: - case 0x719F: - case 0x7200: - case 0x7210: - case 0x7211: - caps->family = CHIP_FAMILY_RV515; - caps->num_vert_fpus = 2; - caps->is_r500 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x71C0: - case 0x71C1: - case 0x71C2: - case 0x71C3: - case 0x71C4: - case 0x71C5: - case 0x71C6: - case 0x71C7: - case 0x71CD: - case 0x71CE: - case 0x71D2: - case 0x71D4: - case 0x71D5: - case 0x71D6: - case 0x71DA: - case 0x71DE: - caps->family = CHIP_FAMILY_RV530; - caps->num_vert_fpus = 5; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7240: - case 0x7243: - case 0x7244: - case 0x7245: - case 0x7246: - case 0x7247: - case 0x7248: - case 0x7249: - case 0x724A: - case 0x724B: - case 0x724C: - case 0x724D: - case 0x724E: - case 0x724F: - case 0x7284: - caps->family = CHIP_FAMILY_R580; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7280: - caps->family = CHIP_FAMILY_RV570; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7281: - case 0x7283: - case 0x7287: - case 0x7288: - case 0x7289: - case 0x728B: - case 0x728C: - case 0x7290: - case 0x7291: - case 0x7293: - case 0x7297: - caps->family = CHIP_FAMILY_RV560; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; + default: + fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...", + caps->pci_id); + abort(); + } - default: - fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\n", - caps->pci_id); + /* Defaults. */ + caps->high_second_pipe = FALSE; + caps->num_vert_fpus = 0; + caps->hiz_ram = 0; + caps->zmask_ram = 0; + + + switch (caps->family) { + case CHIP_FAMILY_R300: + case CHIP_FAMILY_R350: + caps->high_second_pipe = TRUE; + caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV350: + case CHIP_FAMILY_RV370: + caps->high_second_pipe = TRUE; + caps->num_vert_fpus = 2; + caps->zmask_ram = RV3xx_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV380: + caps->high_second_pipe = TRUE; + caps->num_vert_fpus = 2; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = RV3xx_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RS400: + case CHIP_FAMILY_RS600: + case CHIP_FAMILY_RS690: + case CHIP_FAMILY_RS740: + break; + + case CHIP_FAMILY_RC410: + case CHIP_FAMILY_RS480: + caps->zmask_ram = RV3xx_ZMASK_SIZE; + break; + + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R423: + case CHIP_FAMILY_R430: + case CHIP_FAMILY_R480: + case CHIP_FAMILY_R481: + case CHIP_FAMILY_RV410: + caps->num_vert_fpus = 6; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_R520: + caps->num_vert_fpus = 8; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV515: + caps->num_vert_fpus = 2; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV530: + caps->num_vert_fpus = 5; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: + caps->num_vert_fpus = 8; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; } + caps->num_tex_units = 16; + caps->is_r400 = caps->family >= CHIP_FAMILY_R420 && caps->family < CHIP_FAMILY_RV515; + caps->is_r500 = caps->family >= CHIP_FAMILY_RV515; caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; caps->has_us_format = caps->family == CHIP_FAMILY_R520; + caps->has_tcl = caps->num_vert_fpus > 0; + + if (caps->has_tcl) { + caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + } } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index d0050bed2e8..4df6b5b6292 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -96,26 +96,24 @@ struct r300_capabilities { /* Enumerations for legibility and telling which card we're running on. */ enum { - CHIP_FAMILY_R300 = 0, + CHIP_FAMILY_R300 = 0, /* R3xx-based cores. */ CHIP_FAMILY_R350, - CHIP_FAMILY_R360, CHIP_FAMILY_RV350, CHIP_FAMILY_RV370, CHIP_FAMILY_RV380, - CHIP_FAMILY_R420, + CHIP_FAMILY_RS400, + CHIP_FAMILY_RC410, + CHIP_FAMILY_RS480, + CHIP_FAMILY_R420, /* R4xx-based cores. */ CHIP_FAMILY_R423, CHIP_FAMILY_R430, CHIP_FAMILY_R480, CHIP_FAMILY_R481, CHIP_FAMILY_RV410, - CHIP_FAMILY_RS400, - CHIP_FAMILY_RC410, - CHIP_FAMILY_RS480, - CHIP_FAMILY_RS482, CHIP_FAMILY_RS600, CHIP_FAMILY_RS690, CHIP_FAMILY_RS740, - CHIP_FAMILY_RV515, + CHIP_FAMILY_RV515, /* R5xx-based cores. */ CHIP_FAMILY_R520, CHIP_FAMILY_RV530, CHIP_FAMILY_R580, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 240b841ed2a..05af2148b38 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -49,20 +49,18 @@ static const char* r300_get_vendor(struct pipe_screen* pscreen) static const char* chip_families[] = { "ATI R300", "ATI R350", - "ATI R360", "ATI RV350", "ATI RV370", "ATI RV380", + "ATI RS400", + "ATI RC410", + "ATI RS480", "ATI R420", "ATI R423", "ATI R430", "ATI R480", "ATI R481", "ATI RV410", - "ATI RS400", - "ATI RC410", - "ATI RS480", - "ATI RS482", "ATI RS600", "ATI RS690", "ATI RS740", diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 04499c78cc6..121409b2260 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -438,7 +438,7 @@ static void r300_update_rs_block(struct r300_context *r300) /* Rasterize texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { - bool sprite_coord = false; + boolean sprite_coord = false; if (fs_inputs->generic[i] != ATTR_UNUSED) { sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 54f5410c324..9ebfe54c76d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -380,9 +380,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; @@ -438,35 +437,27 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte array_mode = tmp->array_mode[0]; tile_type = tmp->tile_type; - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - S_030000_DIM(r600_tex_dim(texture->target)) | - S_030000_PITCH((pitch / 8) - 1) | - S_030000_NON_DISP_TILING_ORDER(tile_type) | - S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - S_030004_TEX_HEIGHT(texture->height0 - 1) | - S_030004_TEX_DEPTH(texture->depth0 - 1) | - S_030004_ARRAY_MODE(array_mode), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - word4 | - S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | - S_030010_ENDIAN_SWAP(endian) | - S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - S_030014_LAST_LEVEL(state->u.tex.last_level) | - S_030014_BASE_ARRAY(0) | - S_030014_LAST_ARRAY(0), 0xffffffff, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - S_030018_MAX_ANISO(4 /* max 16 samples */), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - S_03001C_DATA_FORMAT(format) | - S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); + rstate->bo[0] = bo[0]; + rstate->bo[1] = bo[1]; + rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) | + S_030000_PITCH((pitch / 8) - 1) | + S_030000_NON_DISP_TILING_ORDER(tile_type) | + S_030000_TEX_WIDTH(texture->width0 - 1)); + rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) | + S_030004_TEX_DEPTH(texture->depth0 - 1) | + S_030004_ARRAY_MODE(array_mode)); + rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; + rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[4] = (word4 | + S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | + S_030010_ENDIAN_SWAP(endian) | + S_030010_BASE_LEVEL(state->u.tex.first_level)); + rstate->val[5] = (S_030014_LAST_LEVEL(state->u.tex.last_level) | + S_030014_BASE_ARRAY(0) | + S_030014_LAST_ARRAY(0)); + rstate->val[6] = (S_030018_MAX_ANISO(4 /* max 16 samples */)); + rstate->val[7] = (S_03001C_DATA_FORMAT(format) | + S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)); return &resource->base; } @@ -1032,7 +1023,10 @@ static void cayman_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_EXPORT_SRC_C(1); r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, (4 << 28), 0xFFFFFFFF, NULL); + /* always set the temp clauses */ + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); @@ -1384,21 +1378,38 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_ES_PRIO(es_prio); r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - tmp = 0; - tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - tmp = 0; - tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); - tmp |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + /* enable dynamic GPR resource management */ + if (r600_get_minor_version(rctx->radeon) >= 7) { + /* always set temp clauses */ + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, + S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, + S_028838_PS_GPRS(0x1e) | + S_028838_VS_GPRS(0x1e) | + S_028838_GS_GPRS(0x1e) | + S_028838_ES_GPRS(0x1e) | + S_028838_HS_GPRS(0x1e) | + S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ + } else { + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); + tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + } tmp = 0; tmp |= S_008C18_NUM_PS_THREADS(num_ps_threads); @@ -1769,45 +1780,32 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) } void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + struct r600_pipe_resource_state *rstate) { rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(stride), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - 0xC0000000, 0xFFFFFFFF, NULL); + + rstate->val[0] = 0; + rstate->bo[0] = NULL; + rstate->val[1] = 0; + rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)); + rstate->val[3] = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W); + rstate->val[4] = 0; + rstate->val[5] = 0; + rstate->val[6] = 0; + rstate->val[7] = 0xc0000000; } -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, +void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride) { - rstate->nregs = 0; - r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo); - r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1); - r600_pipe_state_mod_reg(rstate, S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(stride)); - rstate->nregs = 8; - + rstate->bo[0] = rbuffer->bo; + rstate->val[0] = offset; + rstate->val[1] = rbuffer->bo_size - offset - 1; + rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_030008_STRIDE(stride); } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index ee0c7c9ed9b..d795f5757ed 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -171,6 +171,10 @@ #define S_008C0C_NUM_LS_GPRS(x) (((x) & 0xFF) << 16) #define G_008C0C_NUM_LS_GPRS(x) (((x) >> 16) & 0xFF) #define C_008C0C_NUM_LS_GPRS(x) 0xFF00FFFF + +#define R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10 +#define R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14 + #define R_008C18_SQ_THREAD_RESOURCE_MGMT_1 0x00008C18 #define S_008C18_NUM_PS_THREADS(x) (((x) & 0xFF) << 0) #define G_008C18_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF) @@ -1637,6 +1641,12 @@ #define R_028818_PA_CL_VTE_CNTL 0x00028818 #define R_028820_PA_CL_NANINF_CNTL 0x00028820 #define R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1 0x00028838 +#define S_028838_PS_GPRS(x) (((x) & 0x1F) << 0) +#define S_028838_VS_GPRS(x) (((x) & 0x1F) << 5) +#define S_028838_GS_GPRS(x) (((x) & 0x1F) << 10) +#define S_028838_ES_GPRS(x) (((x) & 0x1F) << 15) +#define S_028838_HS_GPRS(x) (((x) & 0x1F) << 20) +#define S_028838_LS_GPRS(x) (((x) & 0x1F) << 25) #define R_028840_SQ_PGM_START_PS 0x00028840 #define R_02884C_SQ_PGM_EXPORTS_PS 0x0002884C #define S_02884C_EXPORT_COLORS(x) (((x) & 0xF) << 1) @@ -1948,6 +1958,4 @@ #define CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 0x28c38 #define CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 0x28c3c -#define CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10 -#define CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14 #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 23e7181a86e..bf7138d9e4e 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -30,6 +30,7 @@ #include <stdint.h> #include <stdio.h> #include <util/u_double_list.h> +#include <util/u_inlines.h> #include <pipe/p_compiler.h> #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) @@ -47,33 +48,6 @@ struct winsys_handle; enum radeon_family { CHIP_UNKNOWN, - CHIP_R100, - CHIP_RV100, - CHIP_RS100, - CHIP_RV200, - CHIP_RS200, - CHIP_R200, - CHIP_RV250, - CHIP_RS300, - CHIP_RV280, - CHIP_R300, - CHIP_R350, - CHIP_RV350, - CHIP_RV380, - CHIP_R420, - CHIP_R423, - CHIP_RV410, - CHIP_RS400, - CHIP_RS480, - CHIP_RS600, - CHIP_RS690, - CHIP_RS740, - CHIP_RV515, - CHIP_R520, - CHIP_RV530, - CHIP_RV560, - CHIP_RV570, - CHIP_R580, CHIP_R600, CHIP_RV610, CHIP_RV630, @@ -130,14 +104,24 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); -void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, - struct r600_bo *src); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, unsigned stride, struct winsys_handle *whandle); static INLINE unsigned r600_bo_offset(struct r600_bo *bo) { return 0; } +void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); + +/* this relies on the pipe_reference being the first member of r600_bo */ +static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src) +{ + struct r600_bo *old = *dst; + + if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) { + r600_bo_destroy(radeon, old); + } + *dst = src; +} /* R600/R700 STATES */ @@ -170,8 +154,17 @@ struct r600_pipe_state { struct r600_pipe_reg regs[R600_BLOCK_MAX_REG]; }; +struct r600_pipe_resource_state { + unsigned id; + u32 val[8]; + struct r600_bo *bo[2]; +}; + #define R600_BLOCK_STATUS_ENABLED (1 << 0) #define R600_BLOCK_STATUS_DIRTY (1 << 1) +#define R600_BLOCK_STATUS_RESOURCE_DIRTY (1 << 2) + +#define R600_BLOCK_STATUS_RESOURCE_VERTEX (1 << 3) struct r600_block_reloc { struct r600_bo *bo; @@ -182,6 +175,7 @@ struct r600_block_reloc { struct r600_block { struct list_head list; + struct list_head enable_list; unsigned status; unsigned flags; unsigned start_offset; @@ -245,6 +239,8 @@ struct r600_context { unsigned nblocks; struct r600_block **blocks; struct list_head dirty; + struct list_head resource_dirty; + struct list_head enable_list; unsigned pm4_ndwords; unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; @@ -261,6 +257,10 @@ struct r600_context { unsigned num_dest_buffers; unsigned flags; boolean predicate_drawing; + struct r600_range ps_resources; + struct r600_range vs_resources; + struct r600_range fs_resources; + int num_ps_resources, num_vs_resources, num_fs_resources; }; struct r600_draw { @@ -275,9 +275,9 @@ struct r600_draw { int r600_context_init(struct r600_context *ctx, struct radeon *radeon); void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); -void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_flush(struct r600_context *ctx); @@ -303,9 +303,9 @@ void r600_context_flush_dest_caches(struct r600_context *ctx); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); void evergreen_context_flush_dest_caches(struct r600_context *ctx); -void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 65e539eba35..3196d97dbbb 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1088,7 +1088,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al /* Collect required cache lines. */ for (i = 0; i < 3; ++i) { - bool found = false; + boolean found = false; unsigned int line; if (alu->src[i].sel < 512) @@ -1140,7 +1140,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al /* Setup the kcache lines. */ for (i = 0; i < count; ++i) { - bool found = false; + boolean found = false; for (j = 0; j < 2; ++j) { if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 8002d943abd..d92b74ebc4e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -82,12 +82,12 @@ struct r600_screen { struct r600_pipe_sampler_view { struct pipe_sampler_view base; - struct r600_pipe_state state; + struct r600_pipe_resource_state state; }; struct r600_pipe_rasterizer { struct r600_pipe_state rstate; - bool flatshade; + boolean flatshade; unsigned sprite_coord_enable; float offset_units; float offset_scale; @@ -173,7 +173,7 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; - struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS]; + struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; unsigned cb_target_mask; @@ -185,25 +185,25 @@ struct r600_pipe_context { struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; struct r600_pipe_state vs_const_buffer; - struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; + struct r600_pipe_resource_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_state ps_const_buffer; - struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; + struct r600_pipe_resource_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; struct r600_pipe_state vgt; struct r600_pipe_state spi; /* shader information */ unsigned sprite_coord_enable; - bool flatshade; - bool export_16bpc; + boolean flatshade; + boolean export_16bpc; unsigned alpha_ref; - bool alpha_ref_dirty; + boolean alpha_ref_dirty; struct r600_textures_info ps_samplers; struct r600_pipe_fences fences; struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; - bool blit; + boolean blit; }; @@ -224,10 +224,8 @@ void evergreen_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride); -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, + struct r600_pipe_resource_state *rstate); +void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); @@ -268,10 +266,8 @@ void r600_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element *ve) void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride); -void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, + struct r600_pipe_resource_state *rstate); +void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 46fdbfed34a..a6cfa704ca5 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -414,7 +414,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; @@ -477,33 +477,29 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c depth = texture->array_size; } - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - S_038000_DIM(r600_tex_dim(texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(height - 1) | - S_038004_TEX_DEPTH(depth - 1) | - S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - word4 | - S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | - S_038010_REQUEST_SIZE(1) | - S_038010_ENDIAN_SWAP(endian) | - S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - S_038014_LAST_LEVEL(state->u.tex.last_level) | - S_038014_BASE_ARRAY(state->u.tex.first_layer) | - S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | - S_038018_MAX_ANISO(4 /* max 16 samples */), 0xFFFFFFFF, NULL); + rstate->bo[0] = bo[0]; + rstate->bo[1] = bo[1]; + + rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) | + S_038000_TILE_MODE(array_mode) | + S_038000_TILE_TYPE(tile_type) | + S_038000_PITCH((pitch / 8) - 1) | + S_038000_TEX_WIDTH(texture->width0 - 1)); + rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | + S_038004_TEX_DEPTH(depth - 1) | + S_038004_DATA_FORMAT(format)); + rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; + rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[4] = (word4 | + S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | + S_038010_REQUEST_SIZE(1) | + S_038010_ENDIAN_SWAP(endian) | + S_038010_BASE_LEVEL(state->u.tex.first_level)); + rstate->val[5] = (S_038014_LAST_LEVEL(state->u.tex.last_level) | + S_038014_BASE_ARRAY(state->u.tex.first_layer) | + S_038014_LAST_ARRAY(state->u.tex.last_layer)); + rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | + S_038018_MAX_ANISO(4 /* max 16 samples */)); return &resource->base; } @@ -1486,37 +1482,27 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) } void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + struct r600_pipe_resource_state *rstate) { rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_038008_STRIDE(stride), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - 0xC0000000, 0xFFFFFFFF, NULL); + + rstate->bo[0] = NULL; + rstate->val[0] = 0; + rstate->val[1] = 0; + rstate->val[2] = 0; + rstate->val[3] = 0; + rstate->val[4] = 0; + rstate->val[5] = 0; + rstate->val[6] = 0xc0000000; } -void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, +void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride) { - rstate->nregs = 0; - r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo); - r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1); - r600_pipe_state_mod_reg(rstate, S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_038008_STRIDE(stride)); - rstate->nregs = 7; + rstate->val[0] = offset; + rstate->bo[0] = rbuffer->bo; + rstate->val[1] = rbuffer->bo_size - offset - 1; + rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_038008_STRIDE(stride); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 48ab15f9323..a670ac02be2 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -318,12 +318,10 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) static void r600_update_alpha_ref(struct r600_pipe_context *rctx) { - unsigned alpha_ref = rctx->alpha_ref; + unsigned alpha_ref; struct r600_pipe_state rstate; - if (!rctx->alpha_ref_dirty) - return; - + alpha_ref = rctx->alpha_ref; rstate.nregs = 0; if (rctx->export_16bpc) alpha_ref &= ~0x1FFF; @@ -388,7 +386,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource_buffer *rbuffer = r600_buffer(buffer); - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -416,9 +414,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->vs_const_buffer_resource[index]; if (!rstate->id) { if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_pipe_init_buffer_resource(rctx, rstate); } else { - r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_pipe_init_buffer_resource(rctx, rstate); } } @@ -444,9 +442,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->ps_const_buffer_resource[index]; if (!rstate->id) { if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_pipe_init_buffer_resource(rctx, rstate); } else { - r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_pipe_init_buffer_resource(rctx, rstate); } } if (rctx->family >= CHIP_CEDAR) { @@ -468,7 +466,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) { - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; struct r600_resource *rbuffer; struct pipe_vertex_buffer *vertex_buffer; unsigned i, count, offset; @@ -503,9 +501,9 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) if (!rstate->id) { if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_pipe_init_buffer_resource(rctx, rstate); } else { - r600_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + r600_pipe_init_buffer_resource(rctx, rstate); } } @@ -595,7 +593,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) return; } - r600_update_alpha_ref(rctx); + if (rctx->alpha_ref_dirty) + r600_update_alpha_ref(rctx); mask = 0; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index bb72cf63bc5..ac2e65b988e 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -83,7 +83,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SM3: return 1; case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; + return 1; case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: @@ -163,7 +163,7 @@ softpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_POINT_WIDTH_AA: return 255.0; /* arbitrary */ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; /* not actually signficant at this time */ + return 16.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0; /* arbitrary */ default: diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 1446aee2aa4..90766f4119c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1709,6 +1709,317 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, } +/* For anisotropic filtering */ +#define WEIGHT_LUT_SIZE 1024 + +static float *weightLut = NULL; + +/** + * Creates the look-up table used to speed-up EWA sampling + */ +static void +create_filter_table(void) +{ + unsigned i; + if (!weightLut) { + weightLut = (float *) malloc(WEIGHT_LUT_SIZE * sizeof(float)); + + for (i = 0; i < WEIGHT_LUT_SIZE; ++i) { + float alpha = 2; + float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1); + float weight = (float) exp(-alpha * r2); + weightLut[i] = weight; + } + } +} + + +/** + * Elliptical weighted average (EWA) filter for producing high quality + * anisotropic filtered results. + * Based on the Higher Quality Elliptical Weighted Avarage Filter + * published by Paul S. Heckbert in his Master's Thesis + * "Fundamentals of Texture Mapping and Image Warping" (1989) + */ +static void +img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + const float dudx, const float dvdx, + const float dudy, const float dvdy, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + + unsigned level0 = samp->level > 0 ? samp->level : 0; + float scaling = 1.0 / (1 << level0); + int width = u_minify(texture->width0, level0); + int height = u_minify(texture->height0, level0); + + float ux = dudx * scaling; + float vx = dvdx * scaling; + float uy = dudy * scaling; + float vy = dvdy * scaling; + + /* compute ellipse coefficients to bound the region: + * A*x*x + B*x*y + C*y*y = F. + */ + float A = vx*vx+vy*vy+1; + float B = -2*(ux*vx+uy*vy); + float C = ux*ux+uy*uy+1; + float F = A*C-B*B/4.0; + + /* check if it is an ellipse */ + /* ASSERT(F > 0.0); */ + + /* Compute the ellipse's (u,v) bounding box in texture space */ + float d = -B*B+4.0*C*A; + float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with */ + float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */ + + float rgba_temp[NUM_CHANNELS][QUAD_SIZE]; + float s_buffer[QUAD_SIZE]; + float t_buffer[QUAD_SIZE]; + float weight_buffer[QUAD_SIZE]; + unsigned buffer_next; + int j; + float den;// = 0.0F; + float ddq; + float U;// = u0 - tex_u; + int v; + + /* Scale ellipse formula to directly index the Filter Lookup Table. + * i.e. scale so that F = WEIGHT_LUT_SIZE-1 + */ + double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F; + A *= formScale; + B *= formScale; + C *= formScale; + /* F *= formScale; */ /* no need to scale F as we don't use it below here */ + + /* For each quad, the du and dx values are the same and so the ellipse is + * also the same. Note that texel/image access can only be performed using + * a quad, i.e. it is not possible to get the pixel value for a single + * tex coord. In order to have a better performance, the access is buffered + * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is full, + * then the pixel values are read from the image. + */ + ddq = 2 * A; + + for (j = 0; j < QUAD_SIZE; j++) { + /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse + * and incrementally update the value of Ax^2+Bxy*Cy^2; when this + * value, q, is less than F, we're inside the ellipse + */ + float tex_u=-0.5 + s[j] * texture->width0 * scaling; + float tex_v=-0.5 + t[j] * texture->height0 * scaling; + + int u0 = floor(tex_u - box_u); + int u1 = ceil (tex_u + box_u); + int v0 = floor(tex_v - box_v); + int v1 = ceil (tex_v + box_v); + + float num[4] = {0.0F, 0.0F, 0.0F, 0.0F}; + buffer_next = 0; + den = 0; + U = u0 - tex_u; + for (v = v0; v <= v1; ++v) { + float V = v - tex_v; + float dq = A * (2 * U + 1) + B * V; + float q = (C * V + B * U) * V + A * U * U; + + int u; + for (u = u0; u <= u1; ++u) { + /* Note that the ellipse has been pre-scaled so F = WEIGHT_LUT_SIZE - 1 */ + if (q < WEIGHT_LUT_SIZE) { + /* as a LUT is used, q must never be negative; + * should not happen, though + */ + const int qClamped = q >= 0.0F ? q : 0; + float weight = weightLut[qClamped]; + + weight_buffer[buffer_next] = weight; + s_buffer[buffer_next] = u / ((float) width); + t_buffer[buffer_next] = v / ((float) height); + + buffer_next++; + if (buffer_next == QUAD_SIZE) { + /* 4 texel coords are in the buffer -> read it now */ + int jj; + /* it is assumed that samp->min_img_filter is set to + * img_filter_2d_nearest or one of the + * accelerated img_filter_2d_nearest_XXX functions. + */ + samp->min_img_filter(tgsi_sampler, s_buffer, t_buffer, p, NULL, + tgsi_sampler_lod_bias, rgba_temp); + for (jj = 0; jj < buffer_next; jj++) { + num[0] += weight_buffer[jj] * rgba_temp[0][jj]; + num[1] += weight_buffer[jj] * rgba_temp[1][jj]; + num[2] += weight_buffer[jj] * rgba_temp[2][jj]; + num[3] += weight_buffer[jj] * rgba_temp[3][jj]; + } + + buffer_next = 0; + } + + den += weight; + } + q += dq; + dq += ddq; + } + } + + /* if the tex coord buffer contains unread values, we will read them now. + * Note that in most cases we have to read more pixel values than required, + * however, as the img_filter_2d_nearest function(s) does not have a count + * parameter, we need to read the whole quad and ignore the unused values + */ + if (buffer_next > 0) { + int jj; + /* it is assumed that samp->min_img_filter is set to + * img_filter_2d_nearest or one of the + * accelerated img_filter_2d_nearest_XXX functions. + */ + samp->min_img_filter(tgsi_sampler, s_buffer, t_buffer, p, NULL, + tgsi_sampler_lod_bias, rgba_temp); + for (jj = 0; jj < buffer_next; jj++) { + num[0] += weight_buffer[jj] * rgba_temp[0][jj]; + num[1] += weight_buffer[jj] * rgba_temp[1][jj]; + num[2] += weight_buffer[jj] * rgba_temp[2][jj]; + num[3] += weight_buffer[jj] * rgba_temp[3][jj]; + } + } + + if (den <= 0.0F) { + /* Reaching this place would mean + * that no pixels intersected the ellipse. + * This should never happen because + * the filter we use always + * intersects at least one pixel. + */ + + /*rgba[0]=0; + rgba[1]=0; + rgba[2]=0; + rgba[3]=0;*/ + /* not enough pixels in resampling, resort to direct interpolation */ + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba_temp); + den = 1; + num[0] = rgba_temp[0][j]; + num[1] = rgba_temp[1][j]; + num[2] = rgba_temp[2][j]; + num[3] = rgba_temp[3][j]; + } + + rgba[0][j] = num[0] / den; + rgba[1][j] = num[1] / den; + rgba[2][j] = num[2] / den; + rgba[3][j] = num[3] / den; + } +} + + +/** + * Sample 2D texture using an anisotropic filter. + */ +static void +mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + int level0; + float lambda; + float lod[QUAD_SIZE]; + + float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level); + float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level); + float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u; + float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u; + float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v; + float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v; + + if (control == tgsi_sampler_lod_bias) { + /* note: instead of working with Px and Py, we will use the + * squared length instead, to avoid sqrt. + */ + float Px2 = dudx * dudx + dvdx * dvdx; + float Py2 = dudy * dudy + dvdy * dvdy; + + float Pmax2; + float Pmin2; + float e; + const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy; + + if (Px2 < Py2) { + Pmax2 = Py2; + Pmin2 = Px2; + } + else { + Pmax2 = Px2; + Pmin2 = Py2; + } + + /* if the eccentricity of the ellipse is too big, scale up the shorter + * of the two vectors to limit the maximum amount of work per pixel + */ + e = Pmax2 / Pmin2; + if (e > maxEccentricity) { + /* float s=e / maxEccentricity; + minor[0] *= s; + minor[1] *= s; + Pmin2 *= s; */ + Pmin2 = Pmax2 / maxEccentricity; + } + + /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid + * this since 0.5*log(x) = log(sqrt(x)) + */ + lambda = 0.5 * util_fast_log2(Pmin2) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } + else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; + level0 = samp->view->u.tex.first_level + (int)lambda; + + /* If the ellipse covers the whole image, we can + * simply return the average of the whole image. + */ + if (level0 >= texture->last_level) { + samp->level = texture->last_level; + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); + } + else { + /* don't bother interpolating between multiple LODs; it doesn't + * seem to be worth the extra running time. + */ + samp->level = level0; + img_filter_2d_ewa(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, + dudx, dvdx, dudy, dvdy, rgba); + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + /** * Specialized version of mip_filter_linear with hard-wired calls to @@ -2316,14 +2627,33 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler, sampler->normalized_coords && sampler->wrap_s == PIPE_TEX_WRAP_REPEAT && sampler->wrap_t == PIPE_TEX_WRAP_REPEAT && - sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) - { + sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) { samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT; } - else - { + else { samp->mip_filter = mip_filter_linear; } + + /* Anisotropic filtering extension. */ + if (sampler->max_anisotropy > 1) { + samp->mip_filter = mip_filter_linear_aniso; + + /* Override min_img_filter: + * min_img_filter needs to be set to NEAREST since we need to access + * each texture pixel as it is and weight it later; using linear + * filters will have incorrect results. + * By setting the filter to NEAREST here, we can avoid calling the + * generic img_filter_2d_nearest in the anisotropic filter function, + * making it possible to use one of the accelerated implementations + */ + samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler); + + /* on first access create the lookup table containing the filter weights. */ + if (!weightLut) { + create_filter_table(); + } + } + break; } diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index 1ed1d5d25bb..ebcd4bcaf10 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -46,10 +46,10 @@ * to have allocated the fifo space before converting. * * Results: - * id is filld out. + * id is filled out. * * Side effects: - * One surface relocation is preformed for texture handle. + * One surface relocation is performed for texture handle. * *---------------------------------------------------------------------- */ @@ -224,7 +224,7 @@ SVGA3D_DestroyContext(struct svga_winsys_context *swc) // IN * containers for host VRAM objects like textures, vertex * buffers, and depth/stencil buffers. * - * Surfaces are hierarchial: + * Surfaces are hierarchical: * * - Surface may have multiple faces (for cube maps) * @@ -376,11 +376,9 @@ SVGA3D_DestroySurface(struct svga_winsys_context *swc, /* *---------------------------------------------------------------------- * - * SVGA3D_BeginSurfaceDMA-- + * SVGA3D_SurfaceDMA-- * - * Begin a SURFACE_DMA command. This reserves space for it in - * the FIFO, and returns a pointer to the command's box array. - * This function must be paired with SVGA_FIFOCommitAll(). + * Emit a SURFACE_DMA command. * * When the SVGA3D device asynchronously processes this FIFO * command, a DMA operation is performed between host VRAM and |