diff options
author | Christian König <[email protected]> | 2011-07-04 15:04:41 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2011-07-04 15:04:41 +0200 |
commit | c3b2230b71cb3a00a7f4c0987197d397bada650b (patch) | |
tree | 018f5df0f8b5976ddb56ef4f13e9466587838998 /src/gallium/drivers | |
parent | 003401f95c9b59471c22368b7da16fe7a951e490 (diff) | |
parent | 424b1210d951c206e7c2fb8f2778acbd384eb247 (diff) |
Merge remote-tracking branch 'origin/master' into pipe-video
Conflicts:
configure.ac
src/gallium/drivers/r600/r600_state_inlines.h
src/gallium/tests/trivial/Makefile
src/gallium/winsys/g3dvl/dri/XF86dri.c
src/gallium/winsys/g3dvl/dri/driclient.c
src/gallium/winsys/g3dvl/dri/driclient.h
src/gallium/winsys/g3dvl/dri/xf86dri.h
src/gallium/winsys/g3dvl/dri/xf86dristr.h
src/gallium/winsys/r600/drm/r600_bo.c
Diffstat (limited to 'src/gallium/drivers')
71 files changed, 1833 insertions, 704 deletions
diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile index b3f387f9335..778124728bb 100644 --- a/src/gallium/drivers/i915/Makefile +++ b/src/gallium/drivers/i915/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ i915_screen.c \ i915_prim_emit.c \ i915_prim_vbuf.c \ + i915_query.c \ i915_resource.c \ i915_resource_texture.c \ i915_resource_buffer.c \ diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index 8f5deed64a9..98370601b7f 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -16,6 +16,7 @@ i915 = env.ConvenienceLibrary( 'i915_fpc_translate.c', 'i915_prim_emit.c', 'i915_prim_vbuf.c', + 'i915_query.c', 'i915_screen.c', 'i915_state.c', 'i915_state_derived.c', diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index fba180064c3..c26db198d20 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -26,5 +26,20 @@ Random list of problems with i915g: - src/xvmc/i915_structs.h in xf86-video-intel has a few more bits of various commands defined. Scavenge them and see what's useful. +- Do smarter remapping. Right now we send everything onto tex coords 0-7. + We could also use diffuse/specular and pack two sets of 2D coords in a single + 4D. Is it a big problem though? We're more limited by the # of texture + indirections and the # of instructions. + +- Leverage draw to enable more caps: + * PIPE_CAP_TGSI_INSTANCEID + * PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS + +- Finish front/back face. We need to add face support to lp_build_system_values_array and use it in draw_llvm.c. + +- Replace constants and immediates which are 0,1,-1 or a combination of those with a swizzle. + +- i915_delete_fs_state doesn't call draw_delete_fragment_shader. Why? + Other bugs can be found here: https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 4a97746e981..fcb208d6dae 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -66,7 +66,7 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, else clear_color = (u_color.ui & 0xffff) | (u_color.ui << 16); - util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &u_color); + util_pack_color(rgba, cbuf->format, &u_color); clear_color8888 = u_color.ui; } else clear_color = clear_color8888 = 0; diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 7a98ef73c1f..28ff40a2328 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -29,6 +29,7 @@ #include "i915_state.h" #include "i915_screen.h" #include "i915_surface.h" +#include "i915_query.h" #include "i915_batch.h" #include "i915_resource.h" @@ -53,13 +54,11 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; void *mapped_indices = NULL; - unsigned cbuf_dirty; /* * Ack vs contants here, helps ipers a lot. */ - cbuf_dirty = i915->dirty & I915_NEW_VS_CONSTANTS; i915->dirty &= ~I915_NEW_VS_CONSTANTS; if (i915->dirty) @@ -72,15 +71,13 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; draw_set_mapped_index_buffer(draw, mapped_indices); - if (cbuf_dirty) { - if (i915->constants[PIPE_SHADER_VERTEX]) - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, - i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, - (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * - 4 * sizeof(float))); - else - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); - } + if (i915->constants[PIPE_SHADER_VERTEX]) + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, + i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, + (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float))); + else + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); /* * Do the drawing @@ -106,7 +103,7 @@ static void i915_destroy(struct pipe_context *pipe) if (i915->blitter) util_blitter_destroy(i915->blitter); - + if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); @@ -150,6 +147,8 @@ i915_create_context(struct pipe_screen *screen, void *priv) /* init this before draw */ util_slab_create(&i915->transfer_pool, sizeof(struct pipe_transfer), 16, UTIL_SLAB_SINGLETHREADED); + util_slab_create(&i915->texture_transfer_pool, sizeof(struct i915_transfer), + 16, UTIL_SLAB_SINGLETHREADED); /* Batch stream debugging is a bit hacked up at the moment: */ @@ -170,9 +169,11 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915_init_state_functions(i915); i915_init_flush_functions(i915); i915_init_resource_functions(i915); + i915_init_query_functions(i915); draw_install_aaline_stage(i915->draw, &i915->base); draw_install_aapoint_stage(i915->draw, &i915->base); + draw_enable_point_sprites(i915->draw, TRUE); /* augmented draw pipeline clobbers state functions */ i915_init_fixup_state_functions(i915); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 964948edc0e..c964208fedd 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -102,6 +102,8 @@ struct i915_fragment_shader struct tgsi_shader_info info; + struct draw_fragment_shader *draw_data; + uint *program; uint program_len; @@ -260,6 +262,7 @@ struct i915_context { int num_validation_buffers; struct util_slab_mempool transfer_pool; + struct util_slab_mempool texture_transfer_pool; /** blitter/hw-clear */ struct blitter_context* blitter; diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index 2f0f99d0468..509395cf1f5 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -37,6 +37,9 @@ #define I915_PROGRAM_SIZE 192 +/* Use those indices for pos/face routing, must be >= I915_TEX_UNITS */ +#define I915_SEMANTIC_POS 10 +#define I915_SEMANTIC_FACE 11 /** @@ -67,13 +70,13 @@ struct i915_fp_compile { uint temp_flag; /**< Tracks temporary regs which are in use */ uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ + uint register_phases[16]; uint nr_tex_indirect; uint nr_tex_insn; uint nr_alu_insn; uint nr_decl_insn; boolean error; /**< Set if i915_program_error() is called */ - uint wpos_tex; uint NumNativeInstructions; uint NumNativeAluInstructions; uint NumNativeTexInstructions; diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index 76c24d2b2fd..d28595e0fd3 100644 --- a/src/gallium/drivers/i915/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -67,7 +67,7 @@ i915_get_temp(struct i915_fp_compile *p) { int bit = ffs(~p->temp_flag); if (!bit) { - i915_program_error(p, "i915_get_temp: out of temporaries\n"); + i915_program_error(p, "i915_get_temp: out of temporaries"); return 0; } @@ -92,7 +92,7 @@ i915_get_utemp(struct i915_fp_compile * p) { int bit = ffs(~p->utemp_flag); if (!bit) { - i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + i915_program_error(p, "i915_get_utemp: out of temporaries"); return 0; } @@ -128,9 +128,13 @@ i915_emit_decl(struct i915_fp_compile *p, else return reg; - *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); - *(p->decl++) = D1_MBZ; - *(p->decl++) = D2_MBZ; + if (p->decl< p->declarations + I915_PROGRAM_SIZE) { + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + } + else + i915_program_error(p, "Out of declarations"); p->nr_decl_insn++; return reg; @@ -187,9 +191,16 @@ i915_emit_arith(struct i915_fp_compile * p, p->utemp_flag = old_utemp_flag; /* restore */ } - *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); - *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); - *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + if (p->csr< p->program + I915_PROGRAM_SIZE) { + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + } + else + i915_program_error(p, "Out of instructions"); + + if (GET_UREG_TYPE(dest) == REG_TYPE_R) + p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; p->nr_alu_insn++; return dest; @@ -245,17 +256,31 @@ uint i915_emit_texld( struct i915_fp_compile *p, assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); - /* is the sampler coord a texcoord input reg? */ - if (GET_UREG_TYPE(coord) != REG_TYPE_T) { - p->nr_tex_indirect++; - } + /* Output register being oC or oD defines a phase boundary */ + if (GET_UREG_TYPE(dest) == REG_TYPE_OC || + GET_UREG_TYPE(dest) == REG_TYPE_OD) + p->nr_tex_indirect++; - *(p->csr++) = (opcode | - T0_DEST( dest ) | - T0_SAMPLER( sampler )); + /* Reading from an r# register whose contents depend on output of the + * current phase defines a phase boundary. + */ + if (GET_UREG_TYPE(coord) == REG_TYPE_R && + p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) + p->nr_tex_indirect++; + + if (p->csr< p->program + I915_PROGRAM_SIZE) { + *(p->csr++) = (opcode | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + } + else + i915_program_error(p, "Out of instructions"); - *(p->csr++) = T1_ADDRESS_REG( coord ); - *(p->csr++) = T2_MBZ; + if (GET_UREG_TYPE(dest) == REG_TYPE_R) + p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; p->nr_tex_insn++; } @@ -293,7 +318,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) } } - i915_program_error(p, "i915_emit_const1f: out of constants\n"); + i915_program_error(p, "i915_emit_const1f: out of constants"); return 0; } @@ -313,6 +338,8 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + // XXX emit swizzle here for 0, 1, -1 and any combination thereof + // we can use swizzle + neg for that for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf || ifs->constant_flags[reg] == I915_CONSTFLAG_USER) @@ -329,12 +356,10 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) } } - i915_program_error(p, "i915_emit_const2f: out of constants\n"); + i915_program_error(p, "i915_emit_const2f: out of constants"); return 0; } - - uint i915_emit_const4f(struct i915_fp_compile * p, float c0, float c1, float c2, float c3) @@ -342,6 +367,9 @@ i915_emit_const4f(struct i915_fp_compile * p, struct i915_fragment_shader *ifs = p->shader; unsigned reg; + // XXX emit swizzle here for 0, 1, -1 and any combination thereof + // we can use swizzle + neg for that + printf("const %f %f %f %f\n",c0,c1,c2,c3); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 && @@ -363,7 +391,7 @@ i915_emit_const4f(struct i915_fp_compile * p, } } - i915_program_error(p, "i915_emit_const4f: out of constants\n"); + i915_program_error(p, "i915_emit_const4f: out of constants"); return 0; } diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 27f100843bf..0cbd4f2d748 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -41,6 +41,9 @@ #include "draw/draw_vertex.h" +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif /** * Simple pass-through fragment shader to use when we don't have @@ -72,19 +75,33 @@ static unsigned passthrough[] = /* 1, -1/3!, 1/5!, -1/7! */ -static const float sin_constants[4] = { 1.0, +static const float scs_sin_constants[4] = { 1.0, -1.0f / (3 * 2 * 1), 1.0f / (5 * 4 * 3 * 2 * 1), -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) }; /* 1, -1/2!, 1/4!, -1/6! */ -static const float cos_constants[4] = { 1.0, +static const float scs_cos_constants[4] = { 1.0, -1.0f / (2 * 1), 1.0f / (4 * 3 * 2 * 1), -1.0f / (6 * 5 * 4 * 3 * 2 * 1) }; +/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ +static const float sin_constants[4] = { 2.0 * M_PI, + -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), + 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), + -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ +static const float cos_constants[4] = { 1.0, + -4.0f * M_PI * M_PI / (2 * 1), + 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), + -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) +}; + /** @@ -185,12 +202,12 @@ src_vector(struct i915_fp_compile *p, switch (sem_name) { case TGSI_SEMANTIC_POSITION: - debug_printf("SKIP SEM POS\n"); - /* - assert(p->wpos_tex != -1); - src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); - */ - break; + { + /* for fragcoord */ + int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); + break; + } case TGSI_SEMANTIC_COLOR: if (sem_ind == 0) { src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); @@ -212,6 +229,13 @@ src_vector(struct i915_fp_compile *p, src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); break; } + case TGSI_SEMANTIC_FACE: + { + /* for back/front faces */ + int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); + break; + } default: i915_program_error(p, "Bad source->Index"); return 0; @@ -237,7 +261,6 @@ src_vector(struct i915_fp_compile *p, source->Register.SwizzleZ, source->Register.SwizzleW); - /* There's both negate-all-components and per-component negation. * Try to handle both here. */ @@ -252,6 +275,9 @@ src_vector(struct i915_fp_compile *p, /* XXX enable these assertions, or fix things */ assert(!source->Register.Absolute); #endif + if (source->Register.Absolute) + debug_printf("Unhandled absolute value\n"); + return src; } @@ -419,11 +445,6 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, emit_simple_arith(p, &inst2, opcode, numArgs, fs); } - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - /* * Translate TGSI instruction to i915 instruction. * @@ -477,13 +498,6 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); - /* By choosing different taylor constants, could get rid of this mul: - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); - /* * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 @@ -516,6 +530,18 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_const4fv(p, cos_constants), 0); break; + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + /* XXX We just output 0 here */ + debug_printf("Punting DDX/DDX\n"); + src0 = get_result_vector(p, &inst->Dst[0]); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); + break; + case TGSI_OPCODE_DP2: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); @@ -754,9 +780,9 @@ i915_translate_instruction(struct i915_fp_compile *p, * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * scs.x = DP4 t1, sin_constants + * scs.x = DP4 t1, scs_sin_constants * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 - * scs.y = DP4 t1, cos_constants + * scs.y = DP4 t1, scs_cos_constants */ i915_emit_arith(p, A0_MUL, @@ -791,7 +817,7 @@ i915_translate_instruction(struct i915_fp_compile *p, get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_Y, 0, swizzle(tmp1, W, Z, Y, X), - i915_emit_const4fv(p, sin_constants), 0); + i915_emit_const4fv(p, scs_sin_constants), 0); } if (writemask & TGSI_WRITEMASK_X) { @@ -806,7 +832,7 @@ i915_translate_instruction(struct i915_fp_compile *p, get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_X, 0, swizzle(tmp, ONE, Z, Y, X), - i915_emit_const4fv(p, cos_constants), 0); + i915_emit_const4fv(p, scs_cos_constants), 0); } break; @@ -853,13 +879,6 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); - /* By choosing different taylor constants, could get rid of this mul: - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); - /* * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x @@ -907,7 +926,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SNE: - /* if we're neither < nor > then we're != */ + /* if we're < or > then we're != */ src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); @@ -1070,9 +1089,11 @@ i915_translate_instructions(struct i915_fp_compile *p, for (i = parse.FullToken.FullDeclaration.Range.First; i <= parse.FullToken.FullDeclaration.Range.Last; i++) { - assert(i < I915_MAX_TEMPORARY); - /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ - p->temp_flag |= (1 << i); /* mark temp as used */ + if (i >= I915_MAX_TEMPORARY) + debug_printf("Too many temps (%d)\n",i); + else + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ } } break; @@ -1144,6 +1165,8 @@ i915_init_compile(struct i915_context *i915, ifs->num_constants = 0; memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + memset(&p->register_phases, 0, sizeof(p->register_phases)); + for (i = 0; i < I915_TEX_UNITS; i++) ifs->generic_mapping[i] = -1; @@ -1161,8 +1184,6 @@ i915_init_compile(struct i915_context *i915, p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; p->utemp_flag = ~0x7; - p->wpos_tex = -1; - /* initialize the first program word */ *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; @@ -1181,7 +1202,7 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) unsigned long decl_size = (unsigned long) (p->decl - p->declarations); if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) - i915_program_error(p, "Exceeded max nr indirect texture lookups"); + debug_printf("Exceeded max nr indirect texture lookups\n"); if (p->nr_tex_insn > I915_MAX_TEX_INSN) i915_program_error(p, "Exceeded max TEX instructions"); @@ -1234,40 +1255,6 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) } -/** - * Find an unused texture coordinate slot to use for fragment WPOS. - * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). - */ -static void -i915_find_wpos_space(struct i915_fp_compile *p) -{ -#if 0 - const uint inputs - = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ - uint i; - - p->wpos_tex = -1; - - if (inputs & (1 << TGSI_ATTRIB_POS)) { - for (i = 0; i < I915_TEX_UNITS; i++) { - if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { - p->wpos_tex = i; - return; - } - } - - i915_program_error(p, "No free texcoord for wpos value"); - } -#else - if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { - /* frag shader using the fragment position input */ -#if 0 - assert(0); -#endif - } -#endif -} - @@ -1314,7 +1301,6 @@ i915_translate_fragment_program( struct i915_context *i915, } p = i915_init_compile(i915, fs); - i915_find_wpos_space(p); i915_translate_instructions(p, tokens, fs); i915_fixup_depth_write(p); diff --git a/src/gallium/drivers/i915/i915_query.c b/src/gallium/drivers/i915/i915_query.c new file mode 100644 index 00000000000..c886df74bad --- /dev/null +++ b/src/gallium/drivers/i915/i915_query.c @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Fake occlusion queries which return 0, it's better than crashing */ + +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_query.h" + +struct i915_query +{ + unsigned query; +}; + +static struct pipe_query *i915_create_query(struct pipe_context *ctx, + unsigned query_type) +{ + struct i915_query *query = CALLOC_STRUCT( i915_query ); + + return (struct pipe_query *)query; +} + +static void i915_destroy_query(struct pipe_context *ctx, + struct pipe_query *query) +{ + FREE(query); +} + +static void i915_begin_query(struct pipe_context *ctx, + struct pipe_query *query) +{ +} + +static void i915_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ +} + +static boolean i915_get_query_result(struct pipe_context *ctx, + struct pipe_query *query, + boolean wait, + void *vresult) +{ + uint64_t *result = (uint64_t*)vresult; + + /* 2* viewport Max */ + *result = 512*1024*1024; + return TRUE; +} + +void +i915_init_query_functions(struct i915_context *i915) +{ + i915->base.create_query = i915_create_query; + i915->base.destroy_query = i915_destroy_query; + i915->base.begin_query = i915_begin_query; + i915->base.end_query = i915_end_query; + i915->base.get_query_result = i915_get_query_result; +} + diff --git a/src/gallium/drivers/i915/i915_query.h b/src/gallium/drivers/i915/i915_query.h new file mode 100644 index 00000000000..2c689ea6b1c --- /dev/null +++ b/src/gallium/drivers/i915/i915_query.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_QUERY_H +#define I915_QUERY_H + +struct i915_context; +struct pipe_context; + +void i915_init_query_functions( struct i915_context *i915 ); + +#endif /* I915_QUERY_H */ diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h index 6fe032cdb6e..14e786d0f2a 100644 --- a/src/gallium/drivers/i915/i915_reg.h +++ b/src/gallium/drivers/i915/i915_reg.h @@ -170,6 +170,13 @@ #define COLOR_BUF_RGB555 (1<<8) #define COLOR_BUF_RGB565 (2<<8) #define COLOR_BUF_ARGB8888 (3<<8) +#define COLOR_BUF_YCRCB_SWAP (4<<8) +#define COLOR_BUF_YCRCB_NORMAL (5<<8) +#define COLOR_BUF_YCRCB_SWAPUV (6<<8) +#define COLOR_BUF_YCRCB_SWAPUVY (7<<8) +#define COLOR_BUF_ARGB4444 (8<<8) +#define COLOR_BUF_ARGB1555 (9<<8) +#define COLOR_BUF_ARGB2101010 (10<<8) #define DEPTH_FRMT_16_FIXED 0 #define DEPTH_FRMT_16_FLOAT (1<<2) #define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) diff --git a/src/gallium/drivers/i915/i915_resource.c b/src/gallium/drivers/i915/i915_resource.c index 7f52ba11d61..b4719af1fb6 100644 --- a/src/gallium/drivers/i915/i915_resource.c +++ b/src/gallium/drivers/i915/i915_resource.c @@ -7,12 +7,12 @@ static struct pipe_resource * i915_resource_create(struct pipe_screen *screen, - const struct pipe_resource *template) + const struct pipe_resource *template) { if (template->target == PIPE_BUFFER) return i915_buffer_create(screen, template); else - return i915_texture_create(screen, template); + return i915_texture_create(screen, template, FALSE); } diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h index c15ecdfc22a..14eed2c4a79 100644 --- a/src/gallium/drivers/i915/i915_resource.h +++ b/src/gallium/drivers/i915/i915_resource.h @@ -45,6 +45,15 @@ struct i915_buffer { boolean free_on_destroy; }; + +/* Texture transfer. */ +struct i915_transfer { + /* Base class. */ + struct pipe_transfer b; + struct pipe_resource *staging_texture; +}; + + #define I915_MAX_TEXTURE_2D_LEVELS 12 /* max 2048x2048 */ #define I915_MAX_TEXTURE_3D_LEVELS 9 /* max 256x256x256 */ @@ -101,7 +110,8 @@ static INLINE struct i915_buffer *i915_buffer(struct pipe_resource *resource) struct pipe_resource * i915_texture_create(struct pipe_screen *screen, - const struct pipe_resource *template); + const struct pipe_resource *template, + boolean force_untiled); struct pipe_resource * i915_texture_from_handle(struct pipe_screen * screen, diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index b74b19d0fe4..0b6424f8d16 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -37,6 +37,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "i915_context.h" #include "i915_resource.h" @@ -710,7 +711,7 @@ i915_texture_destroy(struct pipe_screen *screen, FREE(tex); } -static struct pipe_transfer * +static struct pipe_transfer * i915_texture_get_transfer(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, @@ -719,19 +720,45 @@ i915_texture_get_transfer(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); struct i915_texture *tex = i915_texture(resource); - struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + struct i915_transfer *transfer = util_slab_alloc(&i915->texture_transfer_pool); + boolean use_staging_texture = FALSE; if (transfer == NULL) return NULL; - transfer->resource = resource; - transfer->level = level; - transfer->usage = usage; - transfer->box = *box; - transfer->stride = tex->stride; - /* FIXME: layer_stride */ + transfer->b.resource = resource; + transfer->b.level = level; + transfer->b.usage = usage; + transfer->b.box = *box; + transfer->b.stride = tex->stride; + transfer->staging_texture = NULL; + /* XXX: handle depth textures everyhwere*/ + transfer->b.layer_stride = 0; + transfer->b.data = NULL; + + /* if we use staging transfers, only support textures we can render to, + * because we need that for u_blitter */ + if (i915->blitter && + i915_is_format_supported(NULL, /* screen */ + transfer->b.resource->format, + 0, /* target */ + 1, /* sample count */ + PIPE_BIND_RENDER_TARGET) && + (usage & PIPE_TRANSFER_WRITE) && + !(usage & (PIPE_TRANSFER_READ | PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_UNSYNCHRONIZED))) + use_staging_texture = TRUE; + + use_staging_texture = FALSE; + + if (use_staging_texture) { + /* + * Allocate the untiled staging texture. + * If the alloc fails, transfer->staging_texture is NULL and we fallback to a map() + */ + transfer->staging_texture = i915_texture_create(pipe->screen, resource, TRUE); + } - return transfer; + return (struct pipe_transfer*)transfer; } static void @@ -739,17 +766,33 @@ i915_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct i915_context *i915 = i915_context(pipe); - util_slab_free(&i915->transfer_pool, transfer); + struct i915_transfer *itransfer = (struct i915_transfer*)transfer; + + if ((itransfer->staging_texture) && + (transfer->usage & PIPE_TRANSFER_WRITE)) { + struct pipe_box sbox; + + u_box_origin_2d(itransfer->b.box.width, itransfer->b.box.height, &sbox); + pipe->resource_copy_region(pipe, itransfer->b.resource, itransfer->b.level, + itransfer->b.box.x, itransfer->b.box.y, itransfer->b.box.z, + itransfer->staging_texture, + 0, &sbox); + pipe->flush(pipe, NULL); + pipe_resource_reference(&itransfer->staging_texture, NULL); + } + + util_slab_free(&i915->texture_transfer_pool, itransfer); } static void * i915_texture_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct pipe_resource *resource = transfer->resource; - struct i915_texture *tex = i915_texture(resource); + struct i915_transfer *itransfer = (struct i915_transfer*)transfer; + struct pipe_resource *resource = itransfer->b.resource; + struct i915_texture *tex = NULL; struct i915_winsys *iws = i915_screen(pipe->screen)->iws; - struct pipe_box *box = &transfer->box; + struct pipe_box *box = &itransfer->b.box; enum pipe_format format = resource->format; unsigned offset; char *map; @@ -757,18 +800,25 @@ i915_texture_transfer_map(struct pipe_context *pipe, if (resource->target != PIPE_TEXTURE_3D && resource->target != PIPE_TEXTURE_CUBE) assert(box->z == 0); - offset = i915_texture_offset(tex, transfer->level, box->z); - /* TODO this is a sledgehammer */ - pipe->flush(pipe, NULL); + if (itransfer->staging_texture) { + tex = i915_texture(itransfer->staging_texture); + } else { + /* TODO this is a sledgehammer */ + tex = i915_texture(resource); + pipe->flush(pipe, NULL); + } + + offset = i915_texture_offset(tex, itransfer->b.level, box->z); map = iws->buffer_map(iws, tex->buffer, - (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); - if (map == NULL) + (itransfer->b.usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); + if (map == NULL) { return NULL; + } return map + offset + - box->y / util_format_get_blockheight(format) * transfer->stride + + box->y / util_format_get_blockheight(format) * itransfer->b.stride + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } @@ -776,14 +826,106 @@ static void i915_texture_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct i915_texture *tex = i915_texture(transfer->resource); + struct i915_transfer *itransfer = (struct i915_transfer*)transfer; + struct i915_texture *tex = i915_texture(itransfer->b.resource); struct i915_winsys *iws = i915_screen(tex->b.b.screen)->iws; + + if (itransfer->staging_texture) + tex = i915_texture(itransfer->staging_texture); + iws->buffer_unmap(iws, tex->buffer); } +static void i915_transfer_inline_write( struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct pipe_transfer *transfer = NULL; + struct i915_transfer *itransfer = NULL; + const uint8_t *src_data = data; + unsigned i; + + transfer = pipe->get_transfer(pipe, + resource, + level, + usage, + box ); + if (transfer == NULL) + goto out; + + itransfer = (struct i915_transfer*)transfer; + + if (itransfer->staging_texture) { + struct i915_texture *tex = i915_texture(itransfer->staging_texture); + enum pipe_format format = tex->b.b.format; + struct i915_winsys *iws = i915_screen(tex->b.b.screen)->iws; + size_t offset; + size_t size; + + offset = i915_texture_offset(tex, transfer->level, transfer->box.z); + + for (i = 0; i < box->depth; i++) { + if (!tex->b.b.last_level && + tex->b.b.width0 == transfer->box.width) { + unsigned nby = util_format_get_nblocksy(format, transfer->box.y); + assert(!offset); + assert(!transfer->box.x); + assert(tex->stride == transfer->stride); + + offset += tex->stride * nby; + size = util_format_get_2d_size(format, transfer->stride, + transfer->box.height); + iws->buffer_write(iws, tex->buffer, offset, size, transfer->data); + + } else { + unsigned nby = util_format_get_nblocksy(format, transfer->box.y); + int i; + offset += util_format_get_stride(format, transfer->box.x); + size = transfer->stride; + + for (i = 0; i < nby; i++) { + iws->buffer_write(iws, tex->buffer, offset, size, transfer->data); + offset += tex->stride; + } + } + offset += layer_stride; + } + } else { + uint8_t *map = pipe_transfer_map(pipe, &itransfer->b); + if (map == NULL) + goto nomap; + + for (i = 0; i < box->depth; i++) { + util_copy_rect(map, + resource->format, + itransfer->b.stride, /* bytes */ + 0, 0, + box->width, + box->height, + src_data, + stride, /* bytes */ + 0, 0); + map += itransfer->b.layer_stride; + src_data += layer_stride; + } +nomap: + if (map) + pipe_transfer_unmap(pipe, &itransfer->b); + } + +out: + if (itransfer) + pipe_transfer_destroy(pipe, &itransfer->b); +} -struct u_resource_vtbl i915_texture_vtbl = + +struct u_resource_vtbl i915_texture_vtbl = { i915_texture_get_handle, /* get_handle */ i915_texture_destroy, /* resource_destroy */ @@ -792,7 +934,7 @@ struct u_resource_vtbl i915_texture_vtbl = i915_texture_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ i915_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + i915_transfer_inline_write /* transfer_inline_write */ }; @@ -800,7 +942,8 @@ struct u_resource_vtbl i915_texture_vtbl = struct pipe_resource * i915_texture_create(struct pipe_screen *screen, - const struct pipe_resource *template) + const struct pipe_resource *template, + boolean force_untiled) { struct i915_screen *is = i915_screen(screen); struct i915_winsys *iws = is->iws; @@ -815,7 +958,10 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; - tex->tiling = i915_texture_tiling(is, tex); + if (force_untiled) + tex->tiling = I915_TILE_NONE; + else + tex->tiling = i915_texture_tiling(is, tex); if (is->is_i945) { if (!i945_texture_layout(tex)) @@ -836,7 +982,7 @@ i915_texture_create(struct pipe_screen *screen, buf_usage = I915_NEW_TEXTURE; tex->buffer = iws->buffer_create_tiled(iws, &tex->stride, tex->total_nblocksy, - &tex->tiling, buf_usage); + &tex->tiling, buf_usage); if (!tex->buffer) goto fail; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index c86baa58b28..e743f6031eb 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -109,17 +109,17 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; /* Features that should be supported (boolean caps). */ /* XXX: Just test the code */ case PIPE_CAP_BLEND_EQUATION_SEPARATE: - /* XXX: No code but hw supports it */ - case PIPE_CAP_POINT_SPRITE: /* Also lie about these when asked to (needed for GLSL / GL 2.0) */ return is->debug.lie ? 1 : 0; @@ -129,7 +129,6 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TGSI_INSTANCEID: - case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: @@ -254,7 +253,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap) } } -static boolean +boolean i915_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, @@ -264,7 +263,10 @@ i915_is_format_supported(struct pipe_screen *screen, static const enum pipe_format tex_supported[] = { PIPE_FORMAT_B8G8R8A8_UNORM, PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_R8G8B8X8_UNORM, PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_B10G10R10A2_UNORM, PIPE_FORMAT_L8_UNORM, PIPE_FORMAT_A8_UNORM, PIPE_FORMAT_I8_UNORM, @@ -283,7 +285,12 @@ i915_is_format_supported(struct pipe_screen *screen, }; static const enum pipe_format render_supported[] = { PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_B10G10R10A2_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; static const enum pipe_format depth_supported[] = { diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index cfc585b5350..9f2004eb942 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -65,5 +65,11 @@ i915_screen(struct pipe_screen *pscreen) return (struct i915_screen *) pscreen; } +boolean +i915_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned tex_usage); #endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 1b57c5776f2..f412626955d 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -146,6 +146,7 @@ i915_create_blend_state(struct pipe_context *pipe, if (blend->dither) cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + /* XXX here take the target fixup into account */ if ((blend->rt[0].colormask & PIPE_MASK_R) == 0) cso_data->LIS5 |= S5_WRITEDISABLE_RED; @@ -246,7 +247,7 @@ i915_create_sampler_state(struct pipe_context *pipe, if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { cso->state[0] |= (SS2_SHADOW_ENABLE | - i915_translate_compare_func(sampler->compare_func)); + i915_translate_shadow_compare_func(sampler->compare_func)); minFilt = FILTER_4X4_FLAT; magFilt = FILTER_4X4_FLAT; @@ -466,6 +467,7 @@ i915_create_fs_state(struct pipe_context *pipe, if (!ifs) return NULL; + ifs->draw_data = draw_create_fragment_shader(i915->draw, templ); ifs->state.tokens = tgsi_dup_tokens(templ->tokens); tgsi_scan_shader(templ->tokens, &ifs->info); @@ -495,6 +497,8 @@ i915_bind_fs_state(struct pipe_context *pipe, void *shader) i915->fs = (struct i915_fragment_shader*) shader; + draw_bind_fragment_shader(i915->draw, (i915->fs ? i915->fs->draw_data : NULL)); + i915->dirty |= I915_NEW_FS; } @@ -503,12 +507,14 @@ void i915_delete_fs_state(struct pipe_context *pipe, void *shader) { struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; - if (ifs->program) + if (ifs->program) { FREE(ifs->program); + ifs->program = NULL; + FREE((struct tgsi_token *)ifs->state.tokens); + ifs->state.tokens = NULL; + } ifs->program_len = 0; - FREE((struct tgsi_token *)ifs->state.tokens); - FREE(ifs); } diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index bf6b30a4530..e01f16e715c 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -33,9 +33,10 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_debug.h" +#include "i915_fpc.h" #include "i915_reg.h" -static uint find_mapping(struct i915_fragment_shader* fs, int unit) +static uint find_mapping(const struct i915_fragment_shader* fs, int unit) { int i; for (i = 0; i < I915_TEX_UNITS ; i++) @@ -58,12 +59,12 @@ static void calculate_vertex_layout(struct i915_context *i915) const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW; + boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW, face; uint i; int src; memset(texCoords, 0, sizeof(texCoords)); - colors[0] = colors[1] = fog = needW = FALSE; + colors[0] = colors[1] = fog = needW = face = FALSE; memset(&vinfo, 0, sizeof(vinfo)); /* Determine which fragment program inputs are needed. Setup HW vertex @@ -72,6 +73,10 @@ static void calculate_vertex_layout(struct i915_context *i915) for (i = 0; i < fs->info.num_inputs; i++) { switch (fs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: + { + uint unit = I915_SEMANTIC_POS; + texCoords[find_mapping(fs, unit)] = TRUE; + } break; case TGSI_SEMANTIC_COLOR: assert(fs->info.input_semantic_index[i] < 2); @@ -80,7 +85,6 @@ static void calculate_vertex_layout(struct i915_context *i915) case TGSI_SEMANTIC_GENERIC: { /* texcoords/varyings/other generic */ - /* XXX handle back/front face and point size */ uint unit = fs->info.input_semantic_index[i]; texCoords[find_mapping(fs, unit)] = TRUE; @@ -90,7 +94,11 @@ static void calculate_vertex_layout(struct i915_context *i915) case TGSI_SEMANTIC_FOG: fog = TRUE; break; + case TGSI_SEMANTIC_FACE: + face = TRUE; + break; default: + debug_printf("Unknown input type %d\n", fs->info.input_semantic_name[i]); assert(0); } } @@ -147,6 +155,20 @@ static void calculate_vertex_layout(struct i915_context *i915) vinfo.hwfmt[1] |= hwtc << (i * 4); } + /* front/back face */ + if (face) { + uint slot = find_mapping(fs, I915_SEMANTIC_FACE); + debug_printf("Front/back face is broken\n"); + /* XXX Because of limitations in the draw module, currently src will be 0 + * for SEMANTIC_FACE, so this aliases to POS. We need to fix in the draw + * module by adding an extra shader output. + */ + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FACE, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_CONSTANT, src); + vinfo.hwfmt[1] &= ~(TEXCOORDFMT_NOT_PRESENT << (slot * 4)); + vinfo.hwfmt[1] |= TEXCOORDFMT_1D << (slot * 4); + } + draw_compute_vertex_size(&vinfo); if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 0155cd83510..39fb13aec7e 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -34,7 +34,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -128,7 +130,7 @@ validate_immediate(struct i915_context *i915, unsigned *batch_space) static void emit_immediate(struct i915_context *i915) { - /* remove unwatned bits and S7 */ + /* remove unwanted bits and S7 */ unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | @@ -341,6 +343,59 @@ emit_constants(struct i915_context *i915) } } +static const struct +{ + enum pipe_format format; + uint hw_shift_R; + uint hw_shift_G; + uint hw_shift_B; + uint hw_shift_A; +} fixup_formats[] = { + { PIPE_FORMAT_R8G8B8A8_UNORM, 20, 24, 28, 16 /* BGRA */}, + { PIPE_FORMAT_L8_UNORM, 28, 28, 28, 16 /* RRRA */}, + { PIPE_FORMAT_I8_UNORM, 28, 28, 28, 16 /* RRRA */}, + { PIPE_FORMAT_A8_UNORM, 16, 16, 16, 16 /* AAAA */}, + { PIPE_FORMAT_NONE, 0, 0, 0, 0}, +}; + +static boolean need_fixup(struct pipe_surface* p) +{ + enum pipe_format f; + + /* if we don't have a surface bound yet, we don't need to fixup the shader */ + if (!p) + return FALSE; + + f = p->format; + for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) + if (fixup_formats[i].format == f) + return TRUE; + + return FALSE; +} + +static uint fixup_swizzle(enum pipe_format f, uint v) +{ + int i; + + for(i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++) + if (fixup_formats[i].format == f) + break; + + if (fixup_formats[i].format == PIPE_FORMAT_NONE) + return v; + + uint rgba = v & 0xFFFF0000; + + v &= 0xFFFF; + v |= ((rgba >> fixup_formats[i].hw_shift_R) & 0xF) << 28; + v |= ((rgba >> fixup_formats[i].hw_shift_G) & 0xF) << 24; + v |= ((rgba >> fixup_formats[i].hw_shift_B) & 0xF) << 20; + v |= ((rgba >> fixup_formats[i].hw_shift_A) & 0xF) << 16; + + return v; +} + static void validate_program(struct i915_context *i915, unsigned *batch_space) { @@ -350,12 +405,39 @@ validate_program(struct i915_context *i915, unsigned *batch_space) static void emit_program(struct i915_context *i915) { - uint i; - /* we should always have, at least, a pass-through program */ - assert(i915->fs->program_len > 0); - for (i = 0; i < i915->fs->program_len; i++) { - OUT_BATCH(i915->fs->program[i]); + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + boolean need_format_fixup = need_fixup(cbuf_surface); + int i; + int fixup_offset = -1; + + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + + if (need_format_fixup) { + /* Find where we emit the output color */ + for (i = i915->fs->program_len - 3; i>0; i-=3) { + uint instr = i915->fs->program[i]; + if ((instr & (REG_NR_MASK << A0_DEST_TYPE_SHIFT)) == + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) ) { + /* Found it! */ + fixup_offset = i + 1; + break; + } + } + if (fixup_offset == -1) { + need_format_fixup = FALSE; + debug_printf("couldn't find fixup offset\n"); } + } + + /* emit the program to the hw */ + for (i = 0; i < i915->fs->program_len; i++) { + if (need_format_fixup && (i == fixup_offset) ) { + uint v = fixup_swizzle(cbuf_surface->format, i915->fs->program[i]); + OUT_BATCH(v); + } else + OUT_BATCH(i915->fs->program[i]); + } } static void diff --git a/src/gallium/drivers/i915/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h index b589117fbfe..aa992f75c51 100644 --- a/src/gallium/drivers/i915/i915_state_inlines.h +++ b/src/gallium/drivers/i915/i915_state_inlines.h @@ -60,6 +60,31 @@ i915_translate_compare_func(unsigned func) } static INLINE unsigned +i915_translate_shadow_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_ALWAYS; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LESS; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_NEVER; + default: + return COMPAREFUNC_NEVER; + } +} + +static INLINE unsigned i915_translate_stencil_op(unsigned op) { switch (op) { diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index be70e7a92c9..0103f7c3530 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -62,6 +62,7 @@ static void update_map(struct i915_context *i915, uint unit, const struct i915_texture *tex, const struct i915_sampler_state *sampler, + const struct pipe_sampler_view* view, uint state[2]); @@ -161,9 +162,10 @@ static void update_samplers(struct i915_context *i915) i915->current.sampler[unit]); /* the result */ update_map(i915, unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit]); /* the result */ + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->fragment_sampler_views[unit], /* sampler view */ + i915->current.texbuffer[unit]); /* the result */ i915->current.sampler_enable_nr++; i915->current.sampler_enable_flags |= (1 << unit); @@ -180,13 +182,21 @@ struct i915_tracked_state i915_hw_samplers = { }; - /*********************************************************************** * Sampler views */ -static uint translate_texture_format(enum pipe_format pipeFormat) +static uint translate_texture_format(enum pipe_format pipeFormat, + const struct pipe_sampler_view* view) { + if ( (view->swizzle_r != PIPE_SWIZZLE_RED || + view->swizzle_g != PIPE_SWIZZLE_GREEN || + view->swizzle_b != PIPE_SWIZZLE_BLUE || + view->swizzle_a != PIPE_SWIZZLE_ALPHA ) && + pipeFormat != PIPE_FORMAT_Z24_UNORM_S8_USCALED && + pipeFormat != PIPE_FORMAT_Z24X8_UNORM ) + debug_printf("i915: unsupported texture swizzle for format %d\n", pipeFormat); + switch (pipeFormat) { case PIPE_FORMAT_L8_UNORM: return MAPSURF_8BIT | MT_8BIT_L8; @@ -202,16 +212,16 @@ static uint translate_texture_format(enum pipe_format pipeFormat) return MAPSURF_16BIT | MT_16BIT_ARGB1555; case PIPE_FORMAT_B4G4R4A4_UNORM: return MAPSURF_16BIT | MT_16BIT_ARGB4444; + case PIPE_FORMAT_B10G10R10A2_UNORM: + return MAPSURF_32BIT | MT_32BIT_ARGB2101010; case PIPE_FORMAT_B8G8R8A8_UNORM: return MAPSURF_32BIT | MT_32BIT_ARGB8888; case PIPE_FORMAT_B8G8R8X8_UNORM: return MAPSURF_32BIT | MT_32BIT_XRGB8888; case PIPE_FORMAT_R8G8B8A8_UNORM: return MAPSURF_32BIT | MT_32BIT_ABGR8888; -#if 0 case PIPE_FORMAT_R8G8B8X8_UNORM: return MAPSURF_32BIT | MT_32BIT_XBGR8888; -#endif case PIPE_FORMAT_YUYV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case PIPE_FORMAT_UYVY: @@ -232,7 +242,25 @@ static uint translate_texture_format(enum pipe_format pipeFormat) return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: - return (MAPSURF_32BIT | MT_32BIT_xI824); + { + if ( view->swizzle_r == PIPE_SWIZZLE_RED && + view->swizzle_g == PIPE_SWIZZLE_RED && + view->swizzle_b == PIPE_SWIZZLE_RED && + view->swizzle_a == PIPE_SWIZZLE_ONE) + return (MAPSURF_32BIT | MT_32BIT_xA824); + if ( view->swizzle_r == PIPE_SWIZZLE_RED && + view->swizzle_g == PIPE_SWIZZLE_RED && + view->swizzle_b == PIPE_SWIZZLE_RED && + view->swizzle_a == PIPE_SWIZZLE_RED) + return (MAPSURF_32BIT | MT_32BIT_xI824); + if ( view->swizzle_r == PIPE_SWIZZLE_ZERO && + view->swizzle_g == PIPE_SWIZZLE_ZERO && + view->swizzle_b == PIPE_SWIZZLE_ZERO && + view->swizzle_a == PIPE_SWIZZLE_RED) + return (MAPSURF_32BIT | MT_32BIT_xL824); + debug_printf("i915: unsupported depth swizzle\n"); + return (MAPSURF_32BIT | MT_32BIT_xL824); + } default: debug_printf("i915: translate_texture_format() bad image format %x\n", pipeFormat); @@ -262,6 +290,7 @@ static void update_map(struct i915_context *i915, uint unit, const struct i915_texture *tex, const struct i915_sampler_state *sampler, + const struct pipe_sampler_view* view, uint state[2]) { const struct pipe_resource *pt = &tex->b.b; @@ -275,7 +304,7 @@ static void update_map(struct i915_context *i915, assert(height); assert(depth); - format = translate_texture_format(pt->format); + format = translate_texture_format(pt->format, view); pitch = tex->stride; assert(format); @@ -318,8 +347,9 @@ static void update_maps(struct i915_context *i915) update_map(i915, unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->fragment_sampler_views[unit], /* sampler view */ i915->current.texbuffer[unit]); } } diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index 2865298318c..0e4000bc2ab 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -42,6 +42,18 @@ static unsigned translate_format(enum pipe_format format) return COLOR_BUF_ARGB8888; case PIPE_FORMAT_B5G6R5_UNORM: return COLOR_BUF_RGB565; + case PIPE_FORMAT_B5G5R5A1_UNORM: + return COLOR_BUF_ARGB1555; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_B4G4R4A4_UNORM: + return COLOR_BUF_ARGB4444; + case PIPE_FORMAT_B10G10R10A2_UNORM: + return COLOR_BUF_ARGB2101010; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return COLOR_BUF_8BIT; default: assert(0); return 0; @@ -137,7 +149,8 @@ static void update_framebuffer(struct i915_context *i915) i915->static_dirty |= I915_DST_RECT; } - i915->hardware_dirty |= I915_HW_STATIC; + /* we also send a new program to make sure the fixup for RGBA surfaces happens */ + i915->hardware_dirty |= I915_HW_STATIC | I915_HW_PROGRAM; /* flush the cache in case we sample from the old renderbuffers */ i915_set_flush_dirty(i915, I915_FLUSH_CACHE); diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index c10a8cbc12c..d6b20ceb5ce 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -79,7 +79,7 @@ llvmpipe = env.ConvenienceLibrary( env.Alias('llvmpipe', llvmpipe) -if env['platform'] != 'embedded': +if not env['embedded']: env = env.Clone() env.Prepend(LIBS = [llvmpipe] + gallium) diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 036a6e0c379..4b2ae1436ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -423,7 +423,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys) lp_jit_screen_init(screen); screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; -#ifdef PIPE_OS_EMBEDDED +#ifdef PIPE_SUBSYSTEM_EMBEDDED screen->num_threads = 0; #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 8df7b236fe0..f4324e69971 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -423,6 +423,70 @@ lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, } } +static void +lp_tile_b8g8r8x8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, + const uint8_t * restrict src, unsigned src_stride, + unsigned x0, unsigned y0) +{ + __m128i *dst128 = (__m128i *) dst; + unsigned x, y; + + src += y0 * src_stride; + src += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *src_row = src; + + for (x = 0; x < TILE_SIZE; x += 4) { + swz4((const __m128i *) (src_row + 0 * src_stride), + (const __m128i *) (src_row + 1 * src_stride), + (const __m128i *) (src_row + 2 * src_stride), + (const __m128i *) (src_row + 3 * src_stride), + dst128 + 2, /* b */ + dst128 + 1, /* g */ + dst128 + 0, /* r */ + dst128 + 3); /* a */ + + dst128 += 4; + src_row += sizeof(__m128i); + } + + src += 4 * src_stride; + } +} + +static void +lp_tile_b8g8r8x8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, + uint8_t * restrict dst, unsigned dst_stride, + unsigned x0, unsigned y0) +{ + unsigned int x, y; + const __m128i *src128 = (const __m128i *) src; + + dst += y0 * dst_stride; + dst += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *dst_row = dst; + + for (x = 0; x < TILE_SIZE; x += 4) { + unswz4( &src128[2], /* b */ + &src128[1], /* g */ + &src128[0], /* r */ + &src128[3], /* a */ + (__m128i *) (dst_row + 0 * dst_stride), + (__m128i *) (dst_row + 1 * dst_stride), + (__m128i *) (dst_row + 2 * dst_stride), + (__m128i *) (dst_row + 3 * dst_stride)); + + src128 += 4; + dst_row += sizeof(__m128i);; + } + + dst += 4 * dst_stride; + } +} + #endif /* PIPE_ARCH_SSE */ ''' @@ -446,7 +510,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): if is_format_supported(format): print ' case %s:' % format.name func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) - if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': + if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM' or format.name == 'PIPE_FORMAT_B8G8R8X8_UNORM': print '#ifdef PIPE_ARCH_SSE' print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' @@ -484,7 +548,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): if is_format_supported(format): print ' case %s:' % format.name func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) - if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': + if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM' or format.name == 'PIPE_FORMAT_B8G8R8X8_UNORM': print '#ifdef PIPE_ARCH_SSE' print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 401155bba6e..223e7682ccd 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -81,20 +81,6 @@ nouveau_screen_bo_new(struct pipe_screen *pscreen, unsigned alignment, return bo; } -struct nouveau_bo * -nouveau_screen_bo_user(struct pipe_screen *pscreen, void *ptr, unsigned bytes) -{ - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nouveau_bo *bo = NULL; - int ret; - - ret = nouveau_bo_user(dev, ptr, bytes, &bo); - if (ret) - return NULL; - - return bo; -} - void * nouveau_screen_bo_map(struct pipe_screen *pscreen, struct nouveau_bo *bo, diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 186ada39677..d910809a0ec 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -47,8 +47,6 @@ nouveau_screen(struct pipe_screen *pscreen) struct nouveau_bo * nouveau_screen_bo_new(struct pipe_screen *pscreen, unsigned alignment, unsigned usage, unsigned bind, unsigned size); -struct nouveau_bo * -nouveau_screen_bo_user(struct pipe_screen *pscreen, void *ptr, unsigned bytes); void * nouveau_screen_bo_map(struct pipe_screen *pscreen, struct nouveau_bo *pb, diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 632ca4daf74..ceb83f6e684 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -168,6 +168,7 @@ nv50_bufctx_add_resident(struct nv50_context *nv50, int ctx, if (!resource->bo) return; + nv50->residents_size += sizeof(struct resident); /* We don't need to reference the resource here, it will be referenced * in the context/state, and bufctx will be reset when state changes. @@ -189,6 +190,7 @@ nv50_bufctx_del_resident(struct nv50_context *nv50, int ctx, top = util_dynarray_pop_ptr(&nv50->residents[ctx], struct resident); if (rsd != top) *rsd = *top; + nv50->residents_size -= sizeof(struct resident); break; } } @@ -201,11 +203,15 @@ nv50_bufctx_emit_relocs(struct nv50_context *nv50) struct util_dynarray *array; unsigned ctx, i, n; + n = nv50->residents_size / sizeof(struct resident); + n += NV50_SCREEN_RESIDENT_BO_COUNT; + + MARK_RING(nv50->screen->base.channel, n, n); + for (ctx = 0; ctx < NV50_BUFCTX_COUNT; ++ctx) { array = &nv50->residents[ctx]; n = array->size / sizeof(struct resident); - MARK_RING(nv50->screen->base.channel, n, n); for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 3f031994f0a..b4af24f6bce 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -64,6 +64,7 @@ struct nv50_context { struct nv50_screen *screen; struct util_dynarray residents[NV50_BUFCTX_COUNT]; + unsigned residents_size; uint32_t dirty; @@ -156,6 +157,7 @@ void nv50_bufctx_del_resident(struct nv50_context *, int ctx, static INLINE void nv50_bufctx_reset(struct nv50_context *nv50, int ctx) { + nv50->residents_size -= nv50->residents[ctx].size; util_dynarray_resize(&nv50->residents[ctx], 0); } diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index aea434b8679..64ad209a728 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -19,6 +19,8 @@ struct nv50_context; #define NV50_SCRATCH_SIZE (2 << 20) #define NV50_SCRATCH_NR_BUFFERS 2 +#define NV50_SCREEN_RESIDENT_BO_COUNT 5 + struct nv50_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index 5d3f52c38c1..e5b10c37bef 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -215,10 +215,12 @@ void nv50_gmtyprog_validate(struct nv50_context *nv50) { struct nouveau_channel *chan = nv50->screen->base.channel; - struct nv50_program *gp = nv50->vertprog; + struct nv50_program *gp = nv50->gmtyprog; + if (!gp) /* GP_ENABLE is updated in linkage validation */ + return; if (!nv50_program_validate(nv50, gp)) - return; + return; BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_TEMP), 1); OUT_RING (chan, gp->max_gpr); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index abdb9ce2f93..bb08941c243 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -404,9 +404,6 @@ nv50_draw_arrays(struct nv50_context *nv50, struct nouveau_channel *chan = nv50->screen->base.channel; unsigned prim; - chan->flush_notify = nv50_draw_vbo_flush_notify; - chan->user_private = nv50; - prim = nv50_prim_gl(mode); while (instance_count--) { @@ -420,8 +417,6 @@ nv50_draw_arrays(struct nv50_context *nv50, prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - - chan->flush_notify = nv50_default_flush_notify; } static void @@ -523,9 +518,6 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten, unsigned prim; const unsigned index_size = nv50->idxbuf.index_size; - chan->flush_notify = nv50_draw_vbo_flush_notify; - chan->user_private = nv50; - prim = nv50_prim_gl(mode); if (index_bias != nv50->state.index_bias) { @@ -631,8 +623,6 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten, prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } } - - chan->flush_notify = nv50_default_flush_notify; } void @@ -659,8 +649,12 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50_state_validate(nv50); + chan->flush_notify = nv50_draw_vbo_flush_notify; + chan->user_private = nv50; + if (nv50->vbo_fifo) { nv50_push_vbo(nv50, info); + chan->flush_notify = nv50_default_flush_notify; return; } @@ -712,6 +706,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + chan->flush_notify = nv50_default_flush_notify; nv50_release_user_vbufs(nv50); } diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 2f2a3da7c44..2679b7f86aa 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -169,6 +169,7 @@ nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, if (!resource->bo) return; + nvc0->residents_size += sizeof(struct resident); /* We don't need to reference the resource here, it will be referenced * in the context/state, and bufctx will be reset when state changes. @@ -190,6 +191,7 @@ nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident); if (rsd != top) *rsd = *top; + nvc0->residents_size -= sizeof(struct resident); break; } } @@ -202,11 +204,15 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) struct util_dynarray *array; unsigned ctx, i, n; + n = nvc0->residents_size / sizeof(struct resident); + n += NVC0_SCREEN_RESIDENT_BO_COUNT; + + MARK_RING(nvc0->screen->base.channel, n, n); + for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { array = &nvc0->residents[ctx]; n = array->size / sizeof(struct resident); - MARK_RING(nvc0->screen->base.channel, n, n); for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index f97141dd46e..b05cc337d5d 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -62,6 +62,7 @@ struct nvc0_context { struct nvc0_screen *screen; struct util_dynarray residents[NVC0_BUFCTX_COUNT]; + unsigned residents_size; uint32_t dirty; @@ -163,6 +164,7 @@ void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, static INLINE void nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) { + nvc0->residents_size -= nvc0->residents[ctx].size; util_dynarray_resize(&nvc0->residents[ctx], 0); } diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 94bf0cf3481..015807e2f5d 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -16,6 +16,8 @@ struct nvc0_context; #define NVC0_SCRATCH_SIZE (2 << 20) #define NVC0_SCRATCH_NR_BUFFERS 2 +#define NVC0_SCREEN_RESIDENT_BO_COUNT 5 + struct nvc0_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 6bbcf2447ec..41079104b39 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -382,9 +382,6 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, struct nouveau_channel *chan = nvc0->screen->base.channel; unsigned prim; - chan->flush_notify = nvc0_draw_vbo_flush_notify; - chan->user_private = nvc0; - prim = nvc0_prim_gl(mode); while (instance_count--) { @@ -397,8 +394,6 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - - chan->flush_notify = nvc0_default_flush_notify; } static void @@ -500,9 +495,6 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, unsigned prim; const unsigned index_size = nvc0->idxbuf.index_size; - chan->flush_notify = nvc0_draw_vbo_flush_notify; - chan->user_private = nvc0; - prim = nvc0_prim_gl(mode); if (index_bias != nvc0->state.index_bias) { @@ -568,8 +560,6 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } } - - chan->flush_notify = nvc0_default_flush_notify; } void @@ -596,8 +586,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0_state_validate(nvc0); + chan->flush_notify = nvc0_draw_vbo_flush_notify; + chan->user_private = nvc0; + if (nvc0->vbo_fifo) { nvc0_push_vbo(nvc0, info); + chan->flush_notify = nvc0_default_flush_notify; return; } @@ -648,6 +642,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + chan->flush_notify = nvc0_default_flush_notify; nvc0_release_user_vbufs(nvc0); } diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 2b1510264a1..98603bedde1 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -24,9 +24,21 @@ nvfx_flush(struct pipe_context *pipe, OUT_RING(chan, 1); }*/ - FIRE_RING(chan); - if (fence) + if (fence) { + /* horrific hack to make glFinish() work in the absence of + * having proper fences in nvfx. a pending rewrite will + * fix this properly, but may be a while off. + */ + MARK_RING(chan, 1, 1); + OUT_RELOC(chan, screen->fence, 0, NOUVEAU_BO_WR | + NOUVEAU_BO_DUMMY, 0, 0); + FIRE_RING(chan); + nouveau_bo_map(screen->fence, NOUVEAU_BO_RDWR); + nouveau_bo_unmap(screen->fence); *fence = NULL; + } else { + FIRE_RING(chan); + } } static void diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 78212029534..0140470d576 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -305,6 +305,7 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->eng3d); nvfx_screen_surface_takedown(pscreen); + nouveau_bo_ref(NULL, &screen->fence); nouveau_screen_fini(&screen->base); @@ -470,6 +471,12 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->context_create = nvfx_create; pscreen->video_context_create = nvfx_video_create; + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 4096, &screen->fence); + if (ret) { + nvfx_screen_destroy(pscreen); + return NULL; + } + switch (dev->chipset & 0xf0) { case 0x30: if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index b1f07187c78..02e7c5d1cad 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -11,6 +11,7 @@ struct nvfx_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; + struct nouveau_bo *fence; struct nvfx_context *cur_ctx; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b24e7faa644..b31141a518e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -46,30 +46,26 @@ static uint32_t r300_translate_primitive(unsigned prim) { - switch (prim) { - case PIPE_PRIM_POINTS: - return R300_VAP_VF_CNTL__PRIM_POINTS; - case PIPE_PRIM_LINES: - return R300_VAP_VF_CNTL__PRIM_LINES; - case PIPE_PRIM_LINE_LOOP: - return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; - case PIPE_PRIM_LINE_STRIP: - return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; - case PIPE_PRIM_TRIANGLES: - return R300_VAP_VF_CNTL__PRIM_TRIANGLES; - case PIPE_PRIM_TRIANGLE_STRIP: - return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; - case PIPE_PRIM_TRIANGLE_FAN: - return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; - case PIPE_PRIM_QUADS: - return R300_VAP_VF_CNTL__PRIM_QUADS; - case PIPE_PRIM_QUAD_STRIP: - return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; - case PIPE_PRIM_POLYGON: - return R300_VAP_VF_CNTL__PRIM_POLYGON; - default: - return 0; - } + static const int prim_conv[] = { + R300_VAP_VF_CNTL__PRIM_POINTS, + R300_VAP_VF_CNTL__PRIM_LINES, + R300_VAP_VF_CNTL__PRIM_LINE_LOOP, + R300_VAP_VF_CNTL__PRIM_LINE_STRIP, + R300_VAP_VF_CNTL__PRIM_TRIANGLES, + R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP, + R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN, + R300_VAP_VF_CNTL__PRIM_QUADS, + R300_VAP_VF_CNTL__PRIM_QUAD_STRIP, + R300_VAP_VF_CNTL__PRIM_POLYGON, + -1, + -1, + -1, + -1 + }; + unsigned hwprim = prim_conv[prim]; + + assert(hwprim != -1); + return hwprim; } static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, @@ -179,8 +175,8 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias, enum r300_prepare_flags { PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */ - PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */ - PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ + PREP_EMIT_VARRAYS = (1 << 2), /* call emit_vertex_arrays? */ + PREP_EMIT_VARRAYS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ PREP_INDEXED = (1 << 4) /* is this draw_elements? */ }; @@ -197,23 +193,22 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, unsigned cs_dwords) { boolean flushed = FALSE; - boolean first_draw = flags & PREP_EMIT_STATES; - boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; - boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean emit_states = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL; /* Add dirty state, index offset, and AOS. */ - if (first_draw) { + if (emit_states) cs_dwords += r300_get_num_dirty_dwords(r300); - if (r300->screen->caps.is_r500) - cs_dwords += 2; /* emit_index_offset */ + if (r300->screen->caps.is_r500) + cs_dwords += 2; /* emit_index_offset */ - if (emit_vertex_arrays) - cs_dwords += 55; /* emit_vertex_arrays */ + if (emit_vertex_arrays) + cs_dwords += 55; /* emit_vertex_arrays */ - if (emit_vertex_arrays_swtcl) - cs_dwords += 7; /* emit_vertex_arrays_swtcl */ - } + if (emit_vertex_arrays_swtcl) + cs_dwords += 7; /* emit_vertex_arrays_swtcl */ cs_dwords += r300_get_num_cs_end_dwords(r300); @@ -242,46 +237,48 @@ static boolean r300_emit_states(struct r300_context *r300, int buffer_offset, int index_bias, int instance_id) { - boolean first_draw = flags & PREP_EMIT_STATES; - boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; - boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean emit_states = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL; boolean indexed = flags & PREP_INDEXED; boolean validate_vbos = flags & PREP_VALIDATE_VBOS; /* Validate buffers and emit dirty state if needed. */ - if (first_draw) { + if (emit_states || (emit_vertex_arrays && validate_vbos)) { if (!r300_emit_buffer_validate(r300, validate_vbos, index_buffer)) { fprintf(stderr, "r300: CS space validation failed. " "(not enough memory?) Skipping rendering.\n"); return FALSE; } + } + if (emit_states) r300_emit_dirty_state(r300); - if (r300->screen->caps.is_r500) { - if (r300->screen->caps.has_tcl) - r500_emit_index_bias(r300, index_bias); - else - r500_emit_index_bias(r300, 0); - } - if (emit_vertex_arrays && - (r300->vertex_arrays_dirty || - r300->vertex_arrays_indexed != indexed || - r300->vertex_arrays_offset != buffer_offset || - r300->vertex_arrays_instance_id != instance_id)) { - r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); - - r300->vertex_arrays_dirty = FALSE; - r300->vertex_arrays_indexed = indexed; - r300->vertex_arrays_offset = buffer_offset; - r300->vertex_arrays_instance_id = instance_id; - } + if (r300->screen->caps.is_r500) { + if (r300->screen->caps.has_tcl) + r500_emit_index_bias(r300, index_bias); + else + r500_emit_index_bias(r300, 0); + } - if (emit_vertex_arrays_swtcl) - r300_emit_vertex_arrays_swtcl(r300, indexed); + if (emit_vertex_arrays && + (r300->vertex_arrays_dirty || + r300->vertex_arrays_indexed != indexed || + r300->vertex_arrays_offset != buffer_offset || + r300->vertex_arrays_instance_id != instance_id)) { + r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); + + r300->vertex_arrays_dirty = FALSE; + r300->vertex_arrays_indexed = indexed; + r300->vertex_arrays_offset = buffer_offset; + r300->vertex_arrays_instance_id = instance_id; } + if (emit_vertex_arrays_swtcl) + r300_emit_vertex_arrays_swtcl(r300, indexed); + return TRUE; } @@ -544,7 +541,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) return; @@ -666,7 +663,7 @@ static void r300_draw_elements(struct r300_context *r300, /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, instance_id)) goto done; @@ -677,10 +674,11 @@ static void r300_draw_elements(struct r300_context *r300, indices3); } else { do { - if (indexSize == 2 && (start & 1)) - short_count = MIN2(count, 65535); - else - short_count = MIN2(count, 65534); + /* The maximum must be divisible by 4 and 3, + * so that quad and triangle lists are split correctly. + * + * Strips, loops, and fans won't work. */ + short_count = MIN2(count, 65532); r300_emit_draw_elements(r300, indexBuffer, indexSize, info->min_index, info->max_index, @@ -692,7 +690,7 @@ static void r300_draw_elements(struct r300_context *r300, /* 15 dwords for emit_draw_elements */ if (count) { if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, + PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, instance_id)) goto done; @@ -718,7 +716,7 @@ static void r300_draw_arrays(struct r300_context *r300, /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, NULL, 9, start, 0, instance_id)) return; @@ -726,7 +724,11 @@ static void r300_draw_arrays(struct r300_context *r300, r300_emit_draw_arrays(r300, info->mode, count); } else { do { - short_count = MIN2(count, 65535); + /* The maximum must be divisible by 4 and 3, + * so that quad and triangle lists are split correctly. + * + * Strips, loops, and fans won't work. */ + short_count = MIN2(count, 65532); r300_emit_draw_arrays(r300, info->mode, short_count); start += short_count; @@ -735,7 +737,7 @@ static void r300_draw_arrays(struct r300_context *r300, /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (count) { if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, + PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, NULL, 9, start, 0, instance_id)) return; } @@ -766,7 +768,6 @@ static void r300_draw_vbo(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct pipe_draw_info info = *dinfo; - boolean buffers_updated, uploader_flushed; info.indexed = info.indexed && r300->index_buffer.buffer; @@ -778,9 +779,7 @@ static void r300_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); /* Start the vbuf manager and update buffers if needed. */ - u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info, - &buffers_updated, &uploader_flushed); - if (buffers_updated) { + if (u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) { r300->vertex_arrays_dirty = TRUE; } @@ -842,7 +841,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); r300_reserve_cs_dwords(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); @@ -1024,12 +1023,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL, NULL, dwords, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL, NULL, 0, 0, -1)) return; } @@ -1064,12 +1063,12 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 256, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 0, 0, -1)) return; } @@ -1106,7 +1105,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, - PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 256, 0, 0, -1)) return; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 7127ea1ac16..057cd9faf03 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1307,7 +1307,7 @@ static void* sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, state->min_mip_filter, - state->max_anisotropy > 0); + state->max_anisotropy > 1); sampler->filter0 |= r300_anisotropy(state->max_anisotropy); diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 121409b2260..f63114e7eb7 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -854,6 +854,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } + /* The hardware doesn't like CLAMP and CLAMP_TO_BORDER + * for the 3rd coordinate if the texture isn't 3D. */ + if (tex->b.b.b.target != PIPE_TEXTURE_3D) { + texstate->filter0 &= ~R300_TX_WRAP_R_MASK; + } + if (tex->tex.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 54dae1acd98..62c03b3909b 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -261,51 +261,49 @@ static INLINE uint32_t r300_translate_wrap(int wrap) } static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip, - int is_anisotropic) + boolean is_anisotropic) { uint32_t retval = 0; - if (is_anisotropic) - retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO; - else { - switch (min) { - case PIPE_TEX_FILTER_NEAREST: - retval |= R300_TX_MIN_FILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - retval |= R300_TX_MIN_FILTER_LINEAR; - break; - default: - fprintf(stderr, "r300: Unknown texture filter %d\n", min); - assert(0); - break; - } - switch (mag) { - case PIPE_TEX_FILTER_NEAREST: - retval |= R300_TX_MAG_FILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - retval |= R300_TX_MAG_FILTER_LINEAR; - break; - default: - fprintf(stderr, "r300: Unknown texture filter %d\n", mag); - assert(0); - break; - } + + switch (min) { + case PIPE_TEX_FILTER_NEAREST: + retval |= R300_TX_MIN_FILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + retval |= is_anisotropic ? R300_TX_MIN_FILTER_ANISO : + R300_TX_MIN_FILTER_LINEAR; + break; + default: + fprintf(stderr, "r300: Unknown texture filter %d\n", min); + assert(0); } + + switch (mag) { + case PIPE_TEX_FILTER_NEAREST: + retval |= R300_TX_MAG_FILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + retval |= is_anisotropic ? R300_TX_MAG_FILTER_ANISO : + R300_TX_MAG_FILTER_LINEAR; + break; + default: + fprintf(stderr, "r300: Unknown texture filter %d\n", mag); + assert(0); + } + switch (mip) { - case PIPE_TEX_MIPFILTER_NONE: - retval |= R300_TX_MIN_FILTER_MIP_NONE; - break; - case PIPE_TEX_MIPFILTER_NEAREST: - retval |= R300_TX_MIN_FILTER_MIP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - retval |= R300_TX_MIN_FILTER_MIP_LINEAR; - break; - default: - fprintf(stderr, "r300: Unknown texture filter %d\n", mip); - assert(0); - break; + case PIPE_TEX_MIPFILTER_NONE: + retval |= R300_TX_MIN_FILTER_MIP_NONE; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + retval |= R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + retval |= R300_TX_MIN_FILTER_MIP_LINEAR; + break; + default: + fprintf(stderr, "r300: Unknown texture filter %d\n", mip); + assert(0); } return retval; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 38ca9a24e45..62c2f1fff6c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -447,16 +447,8 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ - case PIPE_FORMAT_A8R8G8B8_UNORM: - /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ - case PIPE_FORMAT_X8R8G8B8_UNORM: - /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ - case PIPE_FORMAT_A8B8G8R8_UNORM: - /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8X8_UNORM: /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ /* These formats work fine with ARGB8888 if US_OUT_FMT is set @@ -662,10 +654,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) R300_C2_SEL_R | R300_C3_SEL_A; /* ARGB outputs. */ - case PIPE_FORMAT_A8R8G8B8_UNORM: - /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ - case PIPE_FORMAT_X8R8G8B8_UNORM: - /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ case PIPE_FORMAT_A16_UNORM: case PIPE_FORMAT_A16_SNORM: case PIPE_FORMAT_A16_FLOAT: @@ -674,15 +662,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; - /* ABGR outputs. */ - case PIPE_FORMAT_A8B8G8R8_UNORM: - /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ - case PIPE_FORMAT_X8B8G8R8_UNORM: - /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ - return modifier | - R300_C0_SEL_A | R300_C1_SEL_B | - R300_C2_SEL_G | R300_C3_SEL_R; - /* RGBA outputs. */ case PIPE_FORMAT_R8G8B8X8_UNORM: /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 0135808f10a..19f07b2bef8 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -2,11 +2,7 @@ Import('*') env = env.Clone() -try: - env.ParseConfig('pkg-config --cflags libdrm_radeon') -except OSError: - print 'warning: not building r600' - Return() +env.PkgUseModules('DRM_RADEON') env.Append(CPPPATH = [ '#/include', diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index b780dba3e33..b5590116e8f 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -292,7 +292,7 @@ static inline uint32_t r600_translate_stencilformat(enum pipe_format format) static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_L4A4_UNORM: return V_028C70_SWAP_ALT; @@ -305,7 +305,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return V_028C70_SWAP_STD; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_028C70_SWAP_STD_REV; @@ -327,9 +327,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_STD; case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_FLOAT: return V_028C70_SWAP_STD; - /* 32-bit buffers. */ + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: return V_028C70_SWAP_STD_REV; case PIPE_FORMAT_B8G8R8A8_SRGB: @@ -343,6 +344,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_X8R8G8B8_UNORM: return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: return V_028C70_SWAP_STD; @@ -373,13 +375,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: - /* 128-bit buffers. */ + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: @@ -394,7 +396,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_L4A4_UNORM: return V_028C70_COLOR_4_4; @@ -406,7 +408,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return V_028C70_COLOR_8; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_028C70_COLOR_5_6_5; @@ -429,7 +431,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R16_UNORM: return V_028C70_COLOR_16; - /* 32-bit buffers. */ + case PIPE_FORMAT_R16_FLOAT: + return V_028C70_COLOR_16_FLOAT; + + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -472,7 +477,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: return V_028C70_COLOR_10_11_11_FLOAT; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16_USCALED: case PIPE_FORMAT_R16G16B16A16_USCALED: case PIPE_FORMAT_R16G16B16_SSCALED: @@ -492,20 +497,21 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R32G32_SSCALED: return V_028C70_COLOR_32_32; - /* 128-bit buffers. */ + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return V_028C70_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: return V_028C70_COLOR_32_32_32_32; - case PIPE_FORMAT_R32G32B32_FLOAT: - return V_028C70_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_028C70_COLOR_32_32_32_32_FLOAT; - /* YUV buffers. */ + /* YUV buffers. */ case PIPE_FORMAT_UYVY: case PIPE_FORMAT_YUYV: default: - /* R600_ERR("unsupported color format %d\n", format); */ return ~0; /* Unsupported. */ } } @@ -517,11 +523,11 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_4_4: return(ENDIAN_NONE); - /* 8-bit buffers. */ + /* 8-bit buffers. */ case V_028C70_COLOR_8: return(ENDIAN_NONE); - /* 16-bit buffers. */ + /* 16-bit buffers. */ case V_028C70_COLOR_5_6_5: case V_028C70_COLOR_1_5_5_5: case V_028C70_COLOR_4_4_4_4: @@ -529,7 +535,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_8_8: return(ENDIAN_8IN16); - /* 32-bit buffers. */ + /* 32-bit buffers. */ case V_028C70_COLOR_8_8_8_8: case V_028C70_COLOR_2_10_10_10: case V_028C70_COLOR_8_24: @@ -539,7 +545,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_16_16: return(ENDIAN_8IN32); - /* 64-bit buffers. */ + /* 64-bit buffers. */ case V_028C70_COLOR_16_16_16_16: case V_028C70_COLOR_16_16_16_16_FLOAT: return(ENDIAN_8IN16); @@ -548,8 +554,9 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32: return(ENDIAN_8IN32); - /* 128-bit buffers. */ + /* 96-bit buffers. */ case V_028C70_COLOR_32_32_32_FLOAT: + /* 128-bit buffers. */ case V_028C70_COLOR_32_32_32_32_FLOAT: case V_028C70_COLOR_32_32_32_32: return(ENDIAN_8IN32); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 9ebfe54c76d..dc182611482 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -256,6 +256,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, } rstate = &rs->rstate; + rs->clamp_vertex_color = state->clamp_vertex_color; + rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; @@ -482,19 +484,27 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; int i; + int has_depth = 0; for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { - if (resource[i]) + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i + R600_MAX_CONST_BUFFERS); - else + } else evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], views[i]); + } else { + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; + } } } for (i = count; i < NUM_TEX_UNITS; i++) { @@ -504,6 +514,7 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } + rctx->have_depth_texture = has_depth; rctx->ps_samplers.n_views = count; } @@ -689,6 +700,9 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + if (rtex->depth) + rctx->have_depth_fb = TRUE; + if (rtex->depth && !rtex->is_flushing_texture) { r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); rtex = rtex->flushed_depth_texture; @@ -870,6 +884,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); /* build states */ + rctx->have_depth_fb = 0; + rctx->nr_cbufs = state->nr_cbufs; for (int i = 0; i < state->nr_cbufs; i++) { evergreen_cb(rctx, rstate, state, i); } @@ -1616,7 +1632,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; + if (rshader->fs_write_all) + num_cout = rshader->nr_cbufs; + else + num_cout++; } } exports_ps |= S_02884C_EXPORT_COLORS(num_cout); diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index bf7138d9e4e..151e831e5c6 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -245,6 +245,7 @@ struct r600_context { unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; unsigned ctx_pm4_ndwords; + unsigned init_dwords; unsigned nreloc; unsigned creloc; struct r600_reloc *reloc; @@ -261,6 +262,7 @@ struct r600_context { struct r600_range vs_resources; struct r600_range fs_resources; int num_ps_resources, num_vs_resources, num_fs_resources; + boolean have_depth_texture, have_depth_fb; }; struct r600_draw { diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3196d97dbbb..065f955ebcb 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1383,6 +1383,9 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) break; } } + /* slight hack to make gradients always go into same cf */ + if (ntex->inst == SQ_TEX_INST_SET_GRADIENTS_H) + bc->force_add_cf = 1; } /* cf can contains only alu or only vtx or only tex */ @@ -1860,6 +1863,8 @@ void r600_bc_dump(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); fprintf(stderr, "GPR:%X ", cf->output.gpr); fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); @@ -2258,7 +2263,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru ve->fs_size = bc.ndw*4; /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ - ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, 0); + ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); if (ve->fetch_shader == NULL) { r600_bc_clear(&bc); return -ENOMEM; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 151f48a8bf8..6171d285bb9 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -27,9 +27,18 @@ enum r600_blitter_op /* bitmask */ { - R600_CLEAR = 1, - R600_CLEAR_SURFACE = 2, - R600_COPY = 4 + R600_SAVE_TEXTURES = 1, + R600_SAVE_FRAMEBUFFER = 2, + R600_DISABLE_RENDER_COND = 4, + + R600_CLEAR = 0, + + R600_CLEAR_SURFACE = R600_SAVE_FRAMEBUFFER, + + R600_COPY = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES | + R600_DISABLE_RENDER_COND, + + R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND, }; static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) @@ -58,10 +67,10 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op rctx->vbuf_mgr->nr_vertex_buffers, rctx->vbuf_mgr->vertex_buffer); - if (op & (R600_CLEAR_SURFACE | R600_COPY)) + if (op & R600_SAVE_FRAMEBUFFER) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); - if (op & R600_COPY) { + if (op & R600_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( rctx->blitter, rctx->ps_samplers.n_samplers, (void**)rctx->ps_samplers.samplers); @@ -71,11 +80,23 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op (struct pipe_sampler_view**)rctx->ps_samplers.views); } + if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) { + rctx->saved_render_cond = rctx->current_render_cond; + rctx->saved_render_cond_mode = rctx->current_render_cond_mode; + rctx->context.render_condition(&rctx->context, NULL, 0); + } + } static void r600_blitter_end(struct pipe_context *ctx) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + if (rctx->saved_render_cond) { + rctx->context.render_condition(&rctx->context, + rctx->saved_render_cond, + rctx->saved_render_cond_mode); + rctx->saved_render_cond = NULL; + } r600_context_queries_resume(&rctx->ctx); rctx->blit = false; } @@ -107,7 +128,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) depth = 0.0f; - r600_blitter_begin(ctx, R600_CLEAR_SURFACE); + r600_blitter_begin(ctx, R600_DECOMPRESS); util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); r600_blitter_end(ctx); @@ -121,8 +142,6 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx) { unsigned int i; - if (rctx->blit) return; - /* FIXME: This handles fragment shader textures only. */ for (i = 0; i < rctx->ps_samplers.n_views; ++i) { @@ -275,6 +294,8 @@ static void r600_resource_copy_region(struct pipe_context *ctx, { struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; struct texture_orig_info orig_info[2]; + struct pipe_box sbox; + const struct pipe_box *psbox; boolean restore_orig[2]; /* Fallback for buffers. */ @@ -292,7 +313,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx, if (util_format_is_compressed(src->format)) { r600_compressed_to_blittable(src, src_level, &orig_info[0]); restore_orig[0] = TRUE; - } + sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x); + sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y); + sbox.z = src_box->z; + sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width); + sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height); + sbox.depth = src_box->depth; + psbox=&sbox; + } else + psbox=src_box; if (util_format_is_compressed(dst->format)) { r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); @@ -303,7 +332,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx, } r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + src, src_level, psbox); if (restore_orig[0]) r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 70e3619de4b..049a4daae66 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -127,9 +127,6 @@ static void r600_flush(struct pipe_context *ctx, if (rfence) *rfence = r600_create_fence(rctx); - if (!rctx->ctx.pm4_cdwords) - return; - #if 0 sprintf(dname, "gallium-%08d.bof", dc); if (dc < 20) { @@ -139,11 +136,6 @@ static void r600_flush(struct pipe_context *ctx, dc++; #endif r600_context_flush(&rctx->ctx); - - /* XXX This shouldn't be really necessary, but removing it breaks some tests. - * Needless buffer reallocations may significantly increase memory consumption, - * so getting rid of this call is important. */ - u_upload_flush(rctx->vbuf_mgr->uploader); } static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) @@ -373,7 +365,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_SM3: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: @@ -382,6 +373,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: return 1; /* Supported except the original R600. */ @@ -391,14 +385,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return family == CHIP_R600 ? 0 : 1; /* Supported on Evergreen. */ - case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return family >= CHIP_CEDAR ? 1 : 0; /* Unsupported features. */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: @@ -487,9 +479,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 8; /* FIXME */ case PIPE_SHADER_CAP_MAX_INPUTS: if(shader == PIPE_SHADER_FRAGMENT) - return 10; + return 34; else - return 16; + return 32; case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* Max native temporaries. */ case PIPE_SHADER_CAP_MAX_ADDRS: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index d92b74ebc4e..2667c80bcef 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -50,6 +50,7 @@ enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, R600_PIPE_STATE_CONFIG, + R600_PIPE_STATE_SEAMLESS_CUBEMAP, R600_PIPE_STATE_CLIP, R600_PIPE_STATE_SCISSOR, R600_PIPE_STATE_VIEWPORT, @@ -87,6 +88,8 @@ struct r600_pipe_sampler_view { struct r600_pipe_rasterizer { struct r600_pipe_state rstate; + boolean clamp_vertex_color; + boolean clamp_fragment_color; boolean flatshade; unsigned sprite_coord_enable; float offset_units; @@ -124,6 +127,12 @@ struct r600_pipe_shader { struct r600_bo *bo; struct r600_bo *bo_fetch; struct r600_vertex_element vertex_elements; + struct tgsi_token *tokens; +}; + +struct r600_pipe_sampler_state { + struct r600_pipe_state rstate; + boolean seamless_cube_map; }; /* needed for blitter save */ @@ -191,12 +200,20 @@ struct r600_pipe_context { struct r600_pipe_rasterizer *rasterizer; struct r600_pipe_state vgt; struct r600_pipe_state spi; + struct pipe_query *current_render_cond; + unsigned current_render_cond_mode; + struct pipe_query *saved_render_cond; + unsigned saved_render_cond_mode; /* shader information */ + boolean clamp_vertex_color; + boolean clamp_fragment_color; + boolean spi_dirty; unsigned sprite_coord_enable; boolean flatshade; boolean export_16bpc; unsigned alpha_ref; boolean alpha_ref_dirty; + unsigned nr_cbufs; struct r600_textures_info ps_samplers; struct r600_pipe_fences fences; @@ -204,7 +221,9 @@ struct r600_pipe_context { struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; boolean blit; + boolean have_depth_texture, have_depth_fb; + unsigned default_ps_gprs, default_vs_gprs; }; struct r600_drawl { @@ -252,7 +271,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id); @@ -270,6 +289,7 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); +void r600_adjust_gprs(struct r600_pipe_context *rctx); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 181ea3f9e49..bedb48b6031 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -75,6 +75,9 @@ static void r600_render_condition(struct pipe_context *ctx, struct r600_query *rquery = (struct r600_query *)query; int wait_flag = 0; + rctx->current_render_cond = query; + rctx->current_render_cond_mode = mode; + if (!query) { rctx->ctx.predicate_drawing = false; r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 39e6d85d7b4..f83d7079b29 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -63,10 +63,6 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, { struct r600_shader_io *input = &ps->input[id]; - /* position/face doesn't get/need a semantic index */ - if (input->name == TGSI_SEMANTIC_POSITION || input->name == TGSI_SEMANTIC_FACE) - return 0; - for (int i = 0; i < vs->noutput; i++) { if (input->name == vs->output[i].name && input->sid == vs->output[i].sid) { @@ -85,7 +81,8 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s /* copy new shader */ if (shader->bo == NULL) { - shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); if (shader->bo == NULL) { return -ENOMEM; } @@ -121,9 +118,9 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s return 0; } -static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); -int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) { static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -136,10 +133,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); - tgsi_dump(tokens, 0); + tgsi_dump(shader->tokens, 0); } shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); + r = r600_shader_from_tgsi(rctx, shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; @@ -162,6 +159,8 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader r600_bo_reference(rctx->radeon, &shader->bo, NULL); r600_bc_clear(&shader->shader.bc); + + memset(&shader->shader,0,sizeof(struct r600_shader)); } /* @@ -189,7 +188,7 @@ struct r600_shader_ctx { struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; - struct r600_shader_src src[3]; + struct r600_shader_src src[4]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -597,15 +596,17 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) return 0; } -static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) { + struct r600_shader *shader = &pipeshader->shader; + struct tgsi_token *tokens = pipeshader->tokens; struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; unsigned opcode; - int i, r = 0, pos0; + int i, j, r = 0, pos0; ctx.bc = &shader->bc; ctx.shader = shader; @@ -619,6 +620,11 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; + shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || + ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); + + shader->nr_cbufs = rctx->nr_cbufs; + /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. * Values [128,159] correspond to constant buffer bank 0 @@ -728,52 +734,103 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } } - /* export output */ + noutput = shader->noutput; + + /* clamp color outputs */ + if (shader->clamp_color) { + for (i = 0; i < noutput; i++) { + if (shader->output[i].name == TGSI_SEMANTIC_COLOR || + shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { + + int j; + for (j = 0; j < 4; j++) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + /* MOV_SAT R, R */ + alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.dst.sel = shader->output[i].gpr; + alu.dst.chan = j; + alu.dst.write = 1; + alu.dst.clamp = 1; + alu.src[0].sel = alu.dst.sel; + alu.src[0].chan = j; + + if (j == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx.bc, &alu); + if (r) + return r; + } + } + } + } + + /* export output */ + j = 0; for (i = 0, pos0 = 0; i < noutput; i++) { memset(&output[i], 0, sizeof(struct r600_bc_output)); - output[i].gpr = shader->output[i].gpr; - output[i].elem_size = 3; - output[i].swizzle_x = 0; - output[i].swizzle_y = 1; - output[i].swizzle_z = 2; - output[i].swizzle_w = 3; - output[i].burst_count = 1; - output[i].barrier = 1; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output[i].array_base = i - pos0; - output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i + j].array_base = i - pos0; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); switch (ctx.type) { case TGSI_PROCESSOR_VERTEX: if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 60; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 60; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { - output[i].array_base = 61; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 61; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } break; case TGSI_PROCESSOR_FRAGMENT: if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output[i].array_base = shader->output[i].sid; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = shader->output[i].sid; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) { + for (j = 1; j < shader->nr_cbufs; j++) { + memset(&output[i + j], 0, sizeof(struct r600_bc_output)); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].array_base = shader->output[i].sid + j; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } + j--; + } } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 61; - output[i].swizzle_x = 2; - output[i].swizzle_y = 7; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 2; + output[i + j].swizzle_y = 7; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { - output[i].array_base = 61; - output[i].swizzle_x = 7; - output[i].swizzle_y = 1; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 7; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; @@ -786,6 +843,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } } + noutput += j; /* add fake param output for vertex shader if no param is exported */ if (ctx.type == TGSI_PROCESSOR_VERTEX) { for (i = 0, pos0 = 0; i < noutput; i++) { @@ -1306,41 +1364,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - - /* dst.y = max(src.x, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ - alu.src[1].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - - /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = V_SQ_ALU_SRC_1; - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1369,7 +1392,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1426,6 +1451,42 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } + + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ + alu.src[1].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; } @@ -1748,6 +1809,22 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) return 0; } +static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, + unsigned index) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[index].Register.File != TGSI_FILE_INPUT) || + ctx->src[index].neg || ctx->src[index].abs; +} + +static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, + unsigned index) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; +} + static int tgsi_tex(struct r600_shader_ctx *ctx) { static float one_point_five = 1.5f; @@ -1755,19 +1832,70 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) struct r600_bc_tex tex; struct r600_bc_alu alu; unsigned src_gpr; - int r, i; + int r, i, j; int opcode; /* Texture fetch instructions can only use gprs as source. * Also they cannot negate the source or take the absolute value */ - const boolean src_requires_loading = - (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && - inst->Src[0].Register.File != TGSI_FILE_INPUT) || - ctx->src[0].neg || ctx->src[0].abs; + const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); boolean src_loaded = FALSE; + unsigned sampler_src_reg = 1; + + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { + /* TGSI moves the sampler to src reg 3 for TXD */ + sampler_src_reg = 3; + + for (i = 1; i < 3; i++) { + /* set gradients h/v */ + memset(&tex, 0, sizeof(struct r600_bc_tex)); + tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : + SQ_TEX_INST_SET_GRADIENTS_V; + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + + if (tgsi_tex_src_requires_loading(ctx, i)) { + tex.src_gpr = r600_get_temp(ctx); + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + + for (j = 0; j < 4; j++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + r600_bc_src(&alu.src[0], &ctx->src[i], j); + alu.dst.sel = tex.src_gpr; + alu.dst.chan = j; + if (j == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } - src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; - - if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + } else { + tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); + tex.src_sel_x = ctx->src[i].swizzle[0]; + tex.src_sel_y = ctx->src[i].swizzle[1]; + tex.src_sel_z = ctx->src[i].swizzle[2]; + tex.src_sel_w = ctx->src[i].swizzle[3]; + tex.src_rel = ctx->src[i].rel; + } + tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ + tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; + if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { + tex.coord_type_x = 1; + tex.coord_type_y = 1; + tex.coord_type_z = 1; + tex.coord_type_w = 1; + } + r = r600_bc_add_tex(ctx->bc, &tex); + if (r) + return r; + } + } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { int out_chan; /* Add perspective divide */ if (ctx->bc->chiprev == CHIPREV_CAYMAN) { @@ -1954,13 +2082,24 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } opcode = ctx->inst_info->r600_opcode; - if (opcode == SQ_TEX_INST_SAMPLE && - (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) - opcode = SQ_TEX_INST_SAMPLE_C; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) { + switch (opcode) { + case SQ_TEX_INST_SAMPLE: + opcode = SQ_TEX_INST_SAMPLE_C; + break; + case SQ_TEX_INST_SAMPLE_L: + opcode = SQ_TEX_INST_SAMPLE_C_L; + break; + case SQ_TEX_INST_SAMPLE_G: + opcode = SQ_TEX_INST_SAMPLE_C_G; + break; + } + } memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; - tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; @@ -3085,7 +3224,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3191,7 +3330,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, @@ -3243,7 +3382,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3401,7 +3540,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 8f96ce5085c..76aebf2b1ea 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -46,6 +46,8 @@ struct r600_shader { enum radeon_family family; boolean uses_kill; boolean fs_write_all; + boolean clamp_color; + unsigned nr_cbufs; }; #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a6cfa704ca5..7c1976f12e0 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -299,6 +299,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx, } rstate = &rs->rstate; + rs->clamp_vertex_color = state->clamp_vertex_color; + rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; @@ -374,14 +376,17 @@ static void *r600_create_rs_state(struct pipe_context *ctx, static void *r600_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state); + struct r600_pipe_state *rstate; union util_color uc; unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 4 : 0; - if (rstate == NULL) { + if (ss == NULL) { return NULL; } + ss->seamless_cube_map = state->seamless_cube_map; + rstate = &ss->rstate; rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -412,7 +417,6 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_resource_state *rstate; const struct util_format_description *desc; @@ -422,7 +426,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; - unsigned height, depth; + unsigned width, height, depth, offset_level, last_level; if (resource == NULL) return NULL; @@ -448,7 +452,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c } desc = util_format_description(state->format); if (desc == NULL) { - R600_ERR("unknow format %d\n", state->format); + R600_ERR("unknown format %d\n", state->format); } tmp = (struct r600_resource_texture *)texture; if (tmp->depth && !tmp->is_flushing_texture) { @@ -464,12 +468,18 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); - array_mode = tmp->array_mode[0]; + + offset_level = state->u.tex.first_level; + last_level = state->u.tex.last_level - offset_level; + width = u_minify(texture->width0, offset_level); + height = u_minify(texture->height0, offset_level); + depth = u_minify(texture->depth0, offset_level); + + pitch = align(tmp->pitch_in_blocks[offset_level] * + util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[offset_level]; tile_type = tmp->tile_type; - height = texture->height0; - depth = texture->depth0; if (texture->target == PIPE_TEXTURE_1D_ARRAY) { height = 1; depth = texture->array_size; @@ -484,18 +494,18 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038000_TILE_MODE(array_mode) | S_038000_TILE_TYPE(tile_type) | S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1)); + S_038000_TEX_WIDTH(width - 1)); rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format)); - rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8; + rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8; rstate->val[4] = (word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_038010_REQUEST_SIZE(1) | S_038010_ENDIAN_SWAP(endian) | - S_038010_BASE_LEVEL(state->u.tex.first_level)); - rstate->val[5] = (S_038014_LAST_LEVEL(state->u.tex.last_level) | + S_038010_BASE_LEVEL(0)); + rstate->val[5] = (S_038014_LAST_LEVEL(last_level) | S_038014_BASE_ARRAY(state->u.tex.first_layer) | S_038014_LAST_ARRAY(state->u.tex.last_layer)); rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | @@ -524,13 +534,16 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; int i; + int has_depth = 0; for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { - if (resource[i]) + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i + R600_MAX_CONST_BUFFERS); - else + } else r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i + R600_MAX_CONST_BUFFERS); @@ -538,6 +551,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], views[i]); + } else { + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; + } } } for (i = count; i < NUM_TEX_UNITS; i++) { @@ -547,30 +565,61 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } + rctx->have_depth_texture = has_depth; rctx->ps_samplers.n_views = count; } +static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean enable) +{ + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP; + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + (enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)), + 1, NULL); + + free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]); + rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states; + int seamless = -1; memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); rctx->ps_samplers.n_samplers = count; for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); + r600_context_pipe_state_set_ps_sampler(&rctx->ctx, &sstates[i]->rstate, i); + + if (sstates[i]) + seamless = sstates[i]->seamless_cube_map; } + + if (seamless != -1) + r600_set_seamless_cubemap(rctx, seamless); } static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states; + int seamless = -1; for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); + r600_context_pipe_state_set_vs_sampler(&rctx->ctx, &sstates[i]->rstate, i); + + if (sstates[i]) + seamless = sstates[i]->seamless_cube_map; } + + if (seamless != -1) + r600_set_seamless_cubemap(rctx, seamless); } static void r600_set_clip_state(struct pipe_context *ctx, @@ -730,6 +779,9 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + if (rtex->depth) + rctx->have_depth_fb = TRUE; + if (rtex->depth && !rtex->is_flushing_texture) { r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); rtex = rtex->flushed_depth_texture; @@ -892,6 +944,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); /* build states */ + rctx->have_depth_fb = 0; for (int i = 0; i < state->nr_cbufs; i++) { r600_cb(rctx, rstate, state, i); } @@ -1031,6 +1084,46 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.texture_barrier = r600_texture_barrier; } +void r600_adjust_gprs(struct r600_pipe_context *rctx) +{ + enum radeon_family family; + struct r600_pipe_state rstate; + unsigned num_ps_gprs = rctx->default_ps_gprs; + unsigned num_vs_gprs = rctx->default_vs_gprs; + unsigned tmp; + int diff; + + family = r600_get_family(rctx->radeon); + + if (family >= CHIP_CEDAR) + return; + + if (!rctx->ps_shader && !rctx->vs_shader) + return; + + if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs) + { + diff = rctx->ps_shader->shader.bc.ngpr - rctx->default_ps_gprs; + num_vs_gprs -= diff; + num_ps_gprs += diff; + } + + if (rctx->vs_shader->shader.bc.ngpr > rctx->default_vs_gprs) + { + diff = rctx->vs_shader->shader.bc.ngpr - rctx->default_vs_gprs; + num_ps_gprs -= diff; + num_vs_gprs += diff; + } + + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rstate.nregs = 0; + r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL); + + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + void r600_init_config(struct r600_pipe_context *rctx) { int ps_prio; @@ -1173,6 +1266,9 @@ void r600_init_config(struct r600_pipe_context *rctx) break; } + rctx->default_ps_gprs = num_ps_gprs; + rctx->default_vs_gprs = num_vs_gprs; + rstate->id = R600_PIPE_STATE_CONFIG; /* SQ_CONFIG */ @@ -1206,7 +1302,7 @@ void r600_init_config(struct r600_pipe_context *rctx) /* SQ_GPR_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); + tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); /* SQ_THREAD_RESOURCE_MGMT */ @@ -1234,14 +1330,22 @@ void r600_init_config(struct r600_pipe_context *rctx) if (family >= CHIP_RV770) { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + S_009508_DISABLE_CUBE_ANISO(1) | + S_009508_SYNC_GRADIENT(1) | + S_009508_SYNC_WALKER(1) | + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); } else { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + S_009508_DISABLE_CUBE_ANISO(1) | + S_009508_SYNC_GRADIENT(1) | + S_009508_SYNC_WALKER(1) | + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a670ac02be2..d9140403e5a 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -28,6 +28,7 @@ #include <util/u_format.h> #include <pipebuffer/pb_buffer.h> #include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" #include "r600_formats.h" #include "r600_pipe.h" #include "r600d.h" @@ -99,6 +100,8 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) if (state == NULL) return; + rctx->clamp_vertex_color = rs->clamp_vertex_color; + rctx->clamp_fragment_color = rs->clamp_fragment_color; rctx->flatshade = rs->flatshade; rctx->sprite_coord_enable = rs->sprite_coord_enable; rctx->rasterizer = rs; @@ -112,7 +115,7 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) r600_polygon_offset_update(rctx); } if (rctx->ps_shader && rctx->vs_shader) - r600_spi_update(rctx); + rctx->spi_dirty = true; } void r600_delete_rs_state(struct pipe_context *ctx, void *state) @@ -257,7 +260,9 @@ void *r600_create_shader_state(struct pipe_context *ctx, struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); int r; - r = r600_pipe_shader_create(ctx, shader, state->tokens); + shader->tokens = tgsi_dup_tokens(state->tokens); + + r = r600_pipe_shader_create(ctx, shader); if (r) { return NULL; } @@ -273,8 +278,10 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) if (state) { r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate); } - if (rctx->ps_shader && rctx->vs_shader) - r600_spi_update(rctx); + if (rctx->ps_shader && rctx->vs_shader) { + rctx->spi_dirty = true; + r600_adjust_gprs(rctx); + } } void r600_bind_vs_shader(struct pipe_context *ctx, void *state) @@ -286,8 +293,10 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) if (state) { r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate); } - if (rctx->ps_shader && rctx->vs_shader) - r600_spi_update(rctx); + if (rctx->ps_shader && rctx->vs_shader) { + rctx->spi_dirty = true; + r600_adjust_gprs(rctx); + } } void r600_delete_ps_shader(struct pipe_context *ctx, void *state) @@ -299,6 +308,7 @@ void r600_delete_ps_shader(struct pipe_context *ctx, void *state) rctx->ps_shader = NULL; } + free(shader->tokens); r600_pipe_shader_destroy(ctx, shader); free(shader); } @@ -312,6 +322,7 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) rctx->vs_shader = NULL; } + free(shader->tokens); r600_pipe_shader_destroy(ctx, shader); free(shader); } @@ -347,14 +358,23 @@ static void r600_spi_update(struct r600_pipe_context *rctx) struct r600_pipe_shader *shader = rctx->ps_shader; struct r600_pipe_state *rstate = &rctx->spi; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; + unsigned i, tmp, sid; if (rctx->spi.id == 0) r600_spi_block_init(rctx, &rctx->spi); rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || + rshader->input[i].name == TGSI_SEMANTIC_FACE) + if (rctx->family >= CHIP_CEDAR) + continue; + else + sid=0; + else + sid=r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i); + + tmp = S_028644_SEMANTIC(sid); if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || @@ -378,6 +398,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) r600_pipe_state_mod_reg(rstate, tmp); } + rctx->spi_dirty = false; r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -517,21 +538,39 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } } +static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + int r; + + r600_pipe_shader_destroy(ctx, shader); + r = r600_pipe_shader_create(ctx, shader); + if (r) { + return r; + } + r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); + + return 0; +} + void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask; struct r600_draw rdraw; - struct r600_drawl draw = {}; - unsigned prim; + struct r600_drawl draw; + unsigned prim, mask; - r600_flush_depth_textures(rctx); - u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL); + if (!rctx->blit) { + if (rctx->have_depth_fb || rctx->have_depth_texture) + r600_flush_depth_textures(rctx); + } + u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info); r600_vertex_buffer_update(rctx); draw.info = *info; draw.ctx = ctx; + draw.index_buffer = NULL; if (info->indexed && rctx->index_buffer.buffer) { draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); @@ -549,57 +588,29 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_upload_index_buffer(rctx, &draw); } } else { + draw.index_size = 0; + draw.index_buffer_offset = 0; draw.info.index_bias = info->start; } - vgt_dma_swap_mode = 0; - switch (draw.index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - if (R600_BIG_ENDIAN) { - vgt_dma_swap_mode = ENDIAN_8IN16; - } - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - if (R600_BIG_ENDIAN) { - vgt_dma_swap_mode = ENDIAN_8IN32; - } - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw.index_size); - return; - } if (r600_conv_pipe_prim(draw.info.mode, &prim)) return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } + + if (rctx->vs_shader->shader.clamp_color != rctx->clamp_vertex_color) + r600_shader_rebuild(ctx, rctx->vs_shader); + + if ((rctx->ps_shader->shader.clamp_color != rctx->clamp_fragment_color) || + ((rctx->family >= CHIP_CEDAR) && rctx->ps_shader->shader.fs_write_all && + (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) + r600_shader_rebuild(ctx, rctx->ps_shader); + + if (rctx->spi_dirty) + r600_spi_update(rctx); if (rctx->alpha_ref_dirty) r600_update_alpha_ref(rctx); - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } + mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1; if (rctx->vgt.id != R600_PIPE_STATE_VGT) { rctx->vgt.id = R600_PIPE_STATE_VGT; @@ -633,8 +644,10 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.vgt_num_indices = draw.info.count; rdraw.vgt_num_instances = draw.info.instance_count; - rdraw.vgt_index_type = vgt_dma_index_type | (vgt_dma_swap_mode << 2); - rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.vgt_index_type = ((draw.index_size == 4) ? 1 : 0); + if (R600_BIG_ENDIAN) + rdraw.vgt_index_type |= (draw.index_size >> 1) << 2; + rdraw.vgt_draw_initiator = draw.index_size ? 0 : 2; rdraw.indices = NULL; if (draw.index_buffer) { rbuffer = (struct r600_resource*)draw.index_buffer; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index acd41a21214..8711dbf1720 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -284,7 +284,7 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format) static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: @@ -297,7 +297,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_L4A4_UNORM: return V_0280A0_SWAP_ALT; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_SWAP_STD_REV; @@ -320,9 +320,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16_UNORM: case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16_FLOAT: return V_0280A0_SWAP_STD; - /* 32-bit buffers. */ + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: return V_0280A0_SWAP_STD_REV; @@ -368,13 +369,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R32_FLOAT: return V_0280A0_SWAP_STD; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: - /* 128-bit buffers. */ + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: @@ -392,7 +393,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_L4A4_UNORM: return V_0280A0_COLOR_4_4; - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: @@ -401,7 +402,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_COLOR_5_6_5; @@ -425,7 +426,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R16_SNORM: return V_0280A0_COLOR_16; - /* 32-bit buffers. */ + case PIPE_FORMAT_R16_FLOAT: + return V_0280A0_COLOR_16_FLOAT; + + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -468,7 +472,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: return V_0280A0_COLOR_10_11_11_FLOAT; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16_USCALED: case PIPE_FORMAT_R16G16B16A16_USCALED: case PIPE_FORMAT_R16G16B16_SSCALED: @@ -488,20 +492,21 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R32G32_SSCALED: return V_0280A0_COLOR_32_32; - /* 128-bit buffers. */ + /* 96-bit buffers. */ case PIPE_FORMAT_R32G32B32_FLOAT: return V_0280A0_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: return V_0280A0_COLOR_32_32_32_32; - /* YUV buffers. */ + /* YUV buffers. */ case PIPE_FORMAT_UYVY: case PIPE_FORMAT_YUYV: default: - /* R600_ERR("unsupported color format %d %s\n", format, util_format_name(format)); */ return ~0; /* Unsupported. */ } } @@ -513,11 +518,11 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_4_4: return(ENDIAN_NONE); - /* 8-bit buffers. */ + /* 8-bit buffers. */ case V_0280A0_COLOR_8: return(ENDIAN_NONE); - /* 16-bit buffers. */ + /* 16-bit buffers. */ case V_0280A0_COLOR_5_6_5: case V_0280A0_COLOR_1_5_5_5: case V_0280A0_COLOR_4_4_4_4: @@ -525,7 +530,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_8_8: return(ENDIAN_8IN16); - /* 32-bit buffers. */ + /* 32-bit buffers. */ case V_0280A0_COLOR_8_8_8_8: case V_0280A0_COLOR_2_10_10_10: case V_0280A0_COLOR_8_24: @@ -535,7 +540,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_16_16: return(ENDIAN_8IN32); - /* 64-bit buffers. */ + /* 64-bit buffers. */ case V_0280A0_COLOR_16_16_16_16: case V_0280A0_COLOR_16_16_16_16_FLOAT: return(ENDIAN_8IN16); @@ -544,7 +549,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_32_32: return(ENDIAN_8IN32); - /* 128-bit buffers. */ + /* 128-bit buffers. */ case V_0280A0_COLOR_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32: @@ -565,7 +570,7 @@ static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *scree static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) { return r600_translate_colorformat(format) != ~0 && - r600_translate_colorswap(format) != ~0; + r600_translate_colorswap(format) != ~0; } static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 77cdd8dc33d..854761d17cb 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -243,10 +243,11 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); unsigned size, layer_size, i, offset; - unsigned nblocksx, nblocksy; + unsigned nblocksx, nblocksy, extra_size = 0; for (i = 0, offset = 0; i <= ptex->last_level; i++) { unsigned blocksize = util_format_get_blocksize(ptex->format); + unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode); r600_texture_set_array_mode(screen, rtex, i, array_mode); @@ -265,9 +266,13 @@ static void r600_setup_miptree(struct pipe_screen *screen, else size = layer_size * ptex->array_size; + /* evergreen stores depth and stencil separately */ + if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format)) + extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align); + /* align base image and start of miptree */ if ((i == 0) || (i == 1)) - offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); + offset = align(offset, base_align); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */ @@ -275,7 +280,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, offset += size; } - rtex->size = offset; + rtex->size = offset + extra_size; } /* Figure out whether u_blitter will fallback to a transfer operation. @@ -1091,8 +1096,9 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, goto out_word4; } } - + goto out_unknown; } + out_word4: if (word4_p) *word4_p = word4; diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 7482d15e12f..307fd57e21a 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -48,6 +48,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600, &r600->context, *index_buffer, 0, *start, count, ptr); pipe_resource_reference(index_buffer, out_buffer); + pipe_resource_reference(&out_buffer, NULL); *index_size = 2; *start = out_offset / 2; break; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 9281b08bd82..f6eec24cc05 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -2556,6 +2556,9 @@ #define S_009508_DISABLE_CUBE_WRAP(x) (((x) & 0x1) << 0) #define G_009508_DISABLE_CUBE_WRAP(x) (((x) >> 0) & 0x1) #define C_009508_DISABLE_CUBE_WRAP 0xFFFFFFFE +#define S_009508_DISABLE_CUBE_ANISO(x) (((x) & 0x1) << 1) +#define G_009508_DISABLE_CUBE_ANISO(x) (((x) >> 1) & 0x1) +#define C_009508_DISABLE_CUBE_ANISO (~(1 << 1)) #define S_009508_SYNC_GRADIENT(x) (((x) & 0x1) << 24) #define G_009508_SYNC_GRADIENT(x) (((x) >> 24) & 0x1) #define C_009508_SYNC_GRADIENT 0xFEFFFFFF @@ -3465,9 +3468,14 @@ #define SQ_TEX_INST_LD 0x03 #define SQ_TEX_INST_GET_GRADIENTS_H 0x7 #define SQ_TEX_INST_GET_GRADIENTS_V 0x8 +#define SQ_TEX_INST_SET_GRADIENTS_H 0xB +#define SQ_TEX_INST_SET_GRADIENTS_V 0xC #define SQ_TEX_INST_SAMPLE 0x10 #define SQ_TEX_INST_SAMPLE_L 0x11 +#define SQ_TEX_INST_SAMPLE_G 0x14 #define SQ_TEX_INST_SAMPLE_C 0x18 +#define SQ_TEX_INST_SAMPLE_C_L 0x19 +#define SQ_TEX_INST_SAMPLE_C_G 0x1C #endif diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index dbbc249258d..cfb1b9d8d0d 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -207,6 +207,14 @@ void svga_context_flush( struct svga_context *svga, svga->curr.nr_fbs = 0; + /* Flush the upload managers to ensure recycling of upload buffers + * without throttling. This should really be conditioned on + * pipe_buffer_map_range not supporting PIPE_TRANSFER_UNSYNCHRONIZED. + */ + + u_upload_flush(svga->upload_vb); + u_upload_flush(svga->upload_ib); + /* Ensure that texture dma uploads are processed * before submitting commands. */ diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index eca529d262e..34b9e85c1a3 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -372,9 +372,6 @@ struct svga_context /** List of buffers with queued transfers */ struct list_head dirty_buffers; - - /** Was the previous draw done with the SW path? */ - boolean prev_draw_swtnl; }; /* A flag for each state_tracker state object: diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index d8af615ede1..aa096692888 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -145,7 +145,7 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) unsigned i; /* Unmap upload manager vertex buffers */ - u_upload_flush(svga->upload_vb); + u_upload_unmap(svga->upload_vb); for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); @@ -156,7 +156,7 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } /* Unmap upload manager index buffers */ - u_upload_flush(svga->upload_ib); + u_upload_unmap(svga->upload_ib); for (i = 0; i < hwtnl->cmd.prim_count; i++) { if (hwtnl->cmd.prim_ib[i]) { @@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } +void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl, + int index_bias) +{ + hwtnl->index_bias = index_bias; +} @@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, unsigned size = vb ? vb->width0 : 0; unsigned offset = hwtnl->cmd.vdecl[i].array.offset; unsigned stride = hwtnl->cmd.vdecl[i].array.stride; - unsigned index_bias = range->indexBias; + int index_bias = (int) range->indexBias + hwtnl->index_bias; unsigned width; assert(vb); assert(size); assert(offset < size); - assert(index_bias >= 0); assert(min_index <= max_index); - assert(offset + index_bias*stride < size); + if (index_bias >= 0) { + assert(offset + index_bias*stride < size); + } if (min_index != ~0) { assert(offset + (index_bias + min_index) * stride < size); } @@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; + hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias; pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); hwtnl->cmd.prim_count++; diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h index a2403d802be..1dac17421e1 100644 --- a/src/gallium/drivers/svga/svga_draw.h +++ b/src/gallium/drivers/svga/svga_draw.h @@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, enum pipe_error svga_hwtnl_flush( struct svga_hwtnl *hwtnl ); +void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl, + int index_bias); + #endif /* SVGA_DRAW_H_ */ diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h index ca658ac6745..8126f7ee23c 100644 --- a/src/gallium/drivers/svga/svga_draw_private.h +++ b/src/gallium/drivers/svga/svga_draw_private.h @@ -116,6 +116,13 @@ struct draw_cmd { struct svga_hwtnl { struct svga_context *svga; struct u_upload_mgr *upload_ib; + + /* Additional negative index bias due to partial buffer uploads + * This is compensated for in the offset associated with all + * vertex buffers. + */ + + int index_bias; /* Flatshade information: */ diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 2093bcae101..d53edcb23c5 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -25,6 +25,7 @@ #include "svga_cmd.h" +#include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_time.h" @@ -37,6 +38,178 @@ #include "svga_state.h" #include "svga_swtnl.h" #include "svga_debug.h" +#include "svga_resource_buffer.h" +#include "util/u_upload_mgr.h" + +/** + * Determine the ranges to upload for the user-buffers referenced + * by the next draw command. + * + * TODO: It might be beneficial to support multiple ranges. In that case, + * the struct svga_buffer::uploaded member should be made an array or a + * list, since we need to account for the possibility that different ranges + * may be uploaded to different hardware buffers chosen by the utility + * upload manager. + */ + +static void +svga_user_buffer_range(struct svga_context *svga, + unsigned start, + unsigned count, + unsigned instance_count) +{ + const struct pipe_vertex_element *ve = svga->curr.velems->velem; + int i; + + /* + * Release old uploaded range (if not done already) and + * initialize new ranges. + */ + + for (i=0; i < svga->curr.velems->count; i++) { + struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + + pipe_resource_reference(&buffer->uploaded.buffer, NULL); + buffer->uploaded.start = ~0; + buffer->uploaded.end = 0; + } + } + + for (i=0; i < svga->curr.velems->count; i++) { + struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + unsigned first, size; + unsigned instance_div = ve[i].instance_divisor; + unsigned elemSize = util_format_get_blocksize(ve[i].src_format); + + svga->dirty |= SVGA_NEW_VBUFFER; + + if (instance_div) { + first = ve[i].src_offset; + count = (instance_count + instance_div - 1) / instance_div; + size = vb->stride * (count - 1) + elemSize; + } else if (vb->stride) { + first = vb->stride * start + ve[i].src_offset; + size = vb->stride * (count - 1) + elemSize; + } else { + /* Only a single vertex! + * Upload with the largest vertex size the hw supports, + * if possible. + */ + first = ve[i].src_offset; + size = MIN2(16, vb->buffer->width0); + } + + buffer->uploaded.start = MIN2(buffer->uploaded.start, first); + buffer->uploaded.end = MAX2(buffer->uploaded.end, first + size); + } + } +} + +/** + * svga_upload_user_buffers - upload parts of user buffers + * + * This function streams a part of a user buffer to hw and fills + * svga_buffer::uploaded with information on the upload. + */ + +static int +svga_upload_user_buffers(struct svga_context *svga, + unsigned start, + unsigned count, + unsigned instance_count) +{ + const struct pipe_vertex_element *ve = svga->curr.velems->velem; + unsigned i; + int ret; + + svga_user_buffer_range(svga, start, count, instance_count); + + for (i=0; i < svga->curr.velems->count; i++) { + struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + boolean flushed; + + /* + * Check if already uploaded. Otherwise go ahead and upload. + */ + + if (buffer->uploaded.buffer) + continue; + + ret = u_upload_buffer( svga->upload_vb, + 0, + buffer->uploaded.start, + buffer->uploaded.end - buffer->uploaded.start, + &buffer->b.b, + &buffer->uploaded.offset, + &buffer->uploaded.buffer, + &flushed); + + if (ret) + return ret; + + if (0) + debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d" + " sz %d\n", + __FUNCTION__, + i, + buffer, + buffer->uploaded.buffer, + buffer->uploaded.offset, + buffer->uploaded.start, + buffer->uploaded.end - buffer->uploaded.start); + + vb->buffer_offset = buffer->uploaded.offset; + } + } + + return PIPE_OK; +} + +/** + * svga_release_user_upl_buffers - release uploaded parts of user buffers + * + * This function releases the hw copy of the uploaded fraction of the + * user-buffer. It's important to do this as soon as all draw calls + * affecting the uploaded fraction are issued, as this allows for + * efficient reuse of the hardware surface backing the uploaded fraction. + * + * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer + * is set to 0. + */ + +static void +svga_release_user_upl_buffers(struct svga_context *svga) +{ + unsigned i; + unsigned nr; + + nr = svga->curr.num_vertex_buffers; + + for (i = 0; i < nr; ++i) { + struct pipe_vertex_buffer *vb = &svga->curr.vb[i]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + + buffer->uploaded.start = ~0; + buffer->uploaded.end = 0; + if (buffer->uploaded.buffer) + pipe_resource_reference(&buffer->uploaded.buffer, NULL); + } + } +} @@ -50,6 +223,7 @@ retry_draw_range_elements( struct svga_context *svga, unsigned prim, unsigned start, unsigned count, + unsigned instance_count, boolean do_retry ) { enum pipe_error ret = 0; @@ -61,6 +235,10 @@ retry_draw_range_elements( struct svga_context *svga, svga->curr.rast->templ.flatshade, svga->curr.rast->templ.flatshade_first ); + ret = svga_upload_user_buffers( svga, min_index + index_bias, + max_index - min_index + 1, instance_count ); + if (ret != PIPE_OK) + goto retry; ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret) @@ -84,7 +262,7 @@ retry: index_buffer, index_size, index_bias, min_index, max_index, prim, start, count, - FALSE ); + instance_count, FALSE ); } return ret; @@ -96,6 +274,7 @@ retry_draw_arrays( struct svga_context *svga, unsigned prim, unsigned start, unsigned count, + unsigned instance_count, boolean do_retry ) { enum pipe_error ret; @@ -107,6 +286,11 @@ retry_draw_arrays( struct svga_context *svga, svga->curr.rast->templ.flatshade, svga->curr.rast->templ.flatshade_first ); + ret = svga_upload_user_buffers( svga, start, count, instance_count ); + + if (ret != PIPE_OK) + goto retry; + ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret) goto retry; @@ -127,6 +311,7 @@ retry: prim, start, count, + instance_count, FALSE ); } @@ -141,18 +326,11 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) unsigned reduced_prim = u_reduced_prim( info->mode ); unsigned count = info->count; enum pipe_error ret = 0; + boolean needed_swtnl; if (!u_trim_pipe_prim( info->mode, &count )) return; - if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) { - /* We're switching between SW and HW drawing. Do a flush to avoid - * mixing HW and SW rendering with the same vertex buffer. - */ - pipe->flush(pipe, NULL); - svga->prev_draw_swtnl = svga->state.sw.need_swtnl; - } - /* * Mark currently bound target surfaces as dirty * doesn't really matter if it is done before drawing. @@ -167,6 +345,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; } + needed_swtnl = svga->state.sw.need_swtnl; + svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL ); #ifdef DEBUG @@ -176,6 +356,20 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) #endif if (svga->state.sw.need_swtnl) { + if (!needed_swtnl) { + /* + * We're switching from HW to SW TNL. SW TNL will require mapping all + * currently bound vertex buffers, some of which may already be + * referenced in the current command buffer as result of previous HW + * TNL. So flush now, to prevent the context to flush while a referred + * vertex buffer is mapped. + */ + + svga_context_flush(svga, NULL); + } + + /* Avoid leaking the previous hwtnl bias to swtnl */ + svga_hwtnl_set_index_bias( svga->hwtnl, 0 ); ret = svga_swtnl_draw_vbo( svga, info ); } else { @@ -194,6 +388,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start + offset, info->count, + info->instance_count, TRUE ); } else { @@ -201,10 +396,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, + info->instance_count, TRUE ); } } + svga_release_user_upl_buffers( svga ); + if (SVGA_DEBUG & DEBUG_FLUSH) { svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index 95032213fa5..ca8c8d1f5ea 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -129,6 +129,12 @@ struct svga_buffer * is the relative offset within that buffer. */ unsigned offset; + + /** + * Range of user buffer that is uploaded in @buffer at @offset. + */ + unsigned start; + unsigned end; } uploaded; /** @@ -193,7 +199,11 @@ svga_buffer(struct pipe_resource *buffer) static INLINE boolean svga_buffer_is_user_buffer( struct pipe_resource *buffer ) { - return svga_buffer(buffer)->user; + if (buffer) { + return svga_buffer(buffer)->user; + } else { + return FALSE; + } } diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 923958674b4..a657a8bc224 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -651,8 +651,6 @@ svga_redefine_user_buffer(struct pipe_context *pipe, unsigned offset, unsigned size) { - struct svga_screen *ss = svga_screen(pipe->screen); - struct svga_context *svga = svga_context(pipe); struct svga_buffer *sbuf = svga_buffer(resource); assert(sbuf->user); @@ -661,19 +659,8 @@ svga_redefine_user_buffer(struct pipe_context *pipe, assert(!sbuf->hwbuf); /* - * Release any uploaded user buffer. - * - * TODO: As an optimization, we could try to update the uploaded buffer - * instead. + * We always treat the contents of user-buffers as volatile, + * so no particular action needed here. */ - pipe_resource_reference(&sbuf->uploaded.buffer, NULL); - - pipe_mutex_lock(ss->swc_mutex); - - sbuf->key.size.width = sbuf->b.b.width0 = offset + size; - - pipe_mutex_unlock(ss->swc_mutex); - - svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT; } diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index 7c393a1da8d..47eab1a9739 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -38,57 +38,6 @@ #include "svga_hw_reg.h" -static int -upload_user_buffers( struct svga_context *svga ) -{ - enum pipe_error ret = PIPE_OK; - int i; - int nr; - - if (0) - debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers); - - nr = svga->curr.num_vertex_buffers; - - for (i = 0; i < nr; i++) - { - if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer)) - { - struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer); - - if (!buffer->uploaded.buffer) { - boolean flushed; - ret = u_upload_buffer( svga->upload_vb, - 0, 0, - buffer->b.b.width0, - &buffer->b.b, - &buffer->uploaded.offset, - &buffer->uploaded.buffer, - &flushed); - if (ret) - return ret; - - if (0) - debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", - __FUNCTION__, - i, - buffer, - buffer->uploaded.buffer, - buffer->uploaded.offset, - buffer->b.b.width0); - } - - svga->curr.vb[i].buffer_offset = buffer->uploaded.offset; - } - } - - if (0) - debug_printf("%s: DONE\n", __FUNCTION__); - - return ret; -} - - /*********************************************************************** */ @@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, const struct pipe_vertex_element *ve = svga->curr.velems->velem; SVGA3dVertexDecl decl; unsigned i; + unsigned neg_bias = 0; assert(svga->curr.velems->count >= svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); @@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, svga_hwtnl_reset_vdecl( svga->hwtnl, svga->curr.velems->count ); + /** + * We can't set the VDECL offset to something negative, so we + * must calculate a common negative additional index bias, and modify + * the VDECL offsets accordingly so they *all* end up positive. + * + * Note that the exact value of the negative index bias is not that + * important, since we compensate for it when we calculate the vertex + * buffer offset below. The important thing is that all vertex buffer + * offsets remain positive. + * + * Note that we use a negative bias variable in order to make the + * rounding maths more easy to follow, and to avoid int / unsigned + * confusion. + */ + for (i = 0; i < svga->curr.velems->count; i++) { - const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; + const struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + struct svga_buffer *buffer; + unsigned int offset = vb->buffer_offset + ve[i].src_offset; + unsigned tmp_neg_bias = 0; + + if (!vb->buffer) + continue; + + buffer = svga_buffer(vb->buffer); + if (buffer->uploaded.start > offset) { + tmp_neg_bias = buffer->uploaded.start - offset; + if (vb->stride) + tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride; + neg_bias = MAX2(neg_bias, tmp_neg_bias); + } + } + + for (i = 0; i < svga->curr.velems->count; i++) { + const struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; - struct svga_buffer *buffer = svga_buffer(vb->buffer); + struct svga_buffer *buffer; + if (!vb->buffer) + continue; + buffer= svga_buffer(vb->buffer); svga_generate_vdecl_semantics( i, &usage, &index ); /* SVGA_NEW_VELEMENT @@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, decl.identity.usage = usage; decl.identity.usageIndex = index; decl.array.stride = vb->stride; - decl.array.offset = (vb->buffer_offset + - ve[i].src_offset); + + /* Compensate for partially uploaded vbo, and + * for the negative index bias. + */ + decl.array.offset = (vb->buffer_offset + + ve[i].src_offset + + neg_bias * vb->stride + - buffer->uploaded.start); + + assert(decl.array.offset >= 0); svga_hwtnl_vdecl( svga->hwtnl, i, @@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, vb->buffer ); } + svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias ); return 0; } @@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, static int emit_hw_vdecl( struct svga_context *svga, unsigned dirty ) { - int ret = 0; - /* SVGA_NEW_NEED_SWTNL */ if (svga->state.sw.need_swtnl) return 0; /* Do not emit during swtnl */ - /* If we get to here, we know that we're going to draw. Upload - * userbuffers now and try to combine multiple userbuffers from - * multiple draw calls into a single host buffer for performance. - */ - if (svga->curr.any_user_vertex_buffers) { - ret = upload_user_buffers( svga ); - if (ret) - return ret; - } - return emit_hw_vs_vdecl( svga, dirty ); } |