summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/i915/Makefile1
-rw-r--r--src/gallium/drivers/i915/SConscript1
-rw-r--r--src/gallium/drivers/i915/TODO15
-rw-r--r--src/gallium/drivers/i915/i915_clear.c2
-rw-r--r--src/gallium/drivers/i915/i915_context.c25
-rw-r--r--src/gallium/drivers/i915/i915_context.h3
-rw-r--r--src/gallium/drivers/i915/i915_fpc.h5
-rw-r--r--src/gallium/drivers/i915/i915_fpc_emit.c72
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c134
-rw-r--r--src/gallium/drivers/i915/i915_query.c86
-rw-r--r--src/gallium/drivers/i915/i915_query.h36
-rw-r--r--src/gallium/drivers/i915/i915_reg.h7
-rw-r--r--src/gallium/drivers/i915/i915_resource.c4
-rw-r--r--src/gallium/drivers/i915/i915_resource.h12
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c196
-rw-r--r--src/gallium/drivers/i915/i915_screen.c15
-rw-r--r--src/gallium/drivers/i915/i915_screen.h6
-rw-r--r--src/gallium/drivers/i915/i915_state.c14
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c30
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c94
-rw-r--r--src/gallium/drivers/i915/i915_state_inlines.h25
-rw-r--r--src/gallium/drivers/i915/i915_state_sampler.c52
-rw-r--r--src/gallium/drivers/i915/i915_state_static.c15
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py68
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c14
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_shader_state.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c15
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c15
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.c16
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.c7
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.h1
-rw-r--r--src/gallium/drivers/r300/r300_render.c161
-rw-r--r--src/gallium/drivers/r300/r300_state.c2
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c6
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h80
-rw-r--r--src/gallium/drivers/r300/r300_texture.c21
-rw-r--r--src/gallium/drivers/r600/SConscript6
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h45
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c25
-rw-r--r--src/gallium/drivers/r600/r600.h2
-rw-r--r--src/gallium/drivers/r600/r600_asm.c7
-rw-r--r--src/gallium/drivers/r600/r600_blit.c49
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c18
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h22
-rw-r--r--src/gallium/drivers/r600/r600_query.c3
-rw-r--r--src/gallium/drivers/r600/r600_shader.c323
-rw-r--r--src/gallium/drivers/r600/r600_shader.h2
-rw-r--r--src/gallium/drivers/r600/r600_state.c150
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c127
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h41
-rw-r--r--src/gallium/drivers/r600/r600_texture.c14
-rw-r--r--src/gallium/drivers/r600/r600_translate.c1
-rw-r--r--src/gallium/drivers/r600/r600d.h8
-rw-r--r--src/gallium/drivers/svga/svga_context.c8
-rw-r--r--src/gallium/drivers/svga/svga_context.h3
-rw-r--r--src/gallium/drivers/svga/svga_draw.c17
-rw-r--r--src/gallium/drivers/svga/svga_draw.h3
-rw-r--r--src/gallium/drivers/svga/svga_draw_private.h7
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c216
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.h12
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c17
-rw-r--r--src/gallium/drivers/svga/svga_state_vdecl.c119
71 files changed, 1833 insertions, 704 deletions
diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile
index b3f387f9335..778124728bb 100644
--- a/src/gallium/drivers/i915/Makefile
+++ b/src/gallium/drivers/i915/Makefile
@@ -21,6 +21,7 @@ C_SOURCES = \
i915_screen.c \
i915_prim_emit.c \
i915_prim_vbuf.c \
+ i915_query.c \
i915_resource.c \
i915_resource_texture.c \
i915_resource_buffer.c \
diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript
index 8f5deed64a9..98370601b7f 100644
--- a/src/gallium/drivers/i915/SConscript
+++ b/src/gallium/drivers/i915/SConscript
@@ -16,6 +16,7 @@ i915 = env.ConvenienceLibrary(
'i915_fpc_translate.c',
'i915_prim_emit.c',
'i915_prim_vbuf.c',
+ 'i915_query.c',
'i915_screen.c',
'i915_state.c',
'i915_state_derived.c',
diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO
index fba180064c3..c26db198d20 100644
--- a/src/gallium/drivers/i915/TODO
+++ b/src/gallium/drivers/i915/TODO
@@ -26,5 +26,20 @@ Random list of problems with i915g:
- src/xvmc/i915_structs.h in xf86-video-intel has a few more bits of various
commands defined. Scavenge them and see what's useful.
+- Do smarter remapping. Right now we send everything onto tex coords 0-7.
+ We could also use diffuse/specular and pack two sets of 2D coords in a single
+ 4D. Is it a big problem though? We're more limited by the # of texture
+ indirections and the # of instructions.
+
+- Leverage draw to enable more caps:
+ * PIPE_CAP_TGSI_INSTANCEID
+ * PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS
+
+- Finish front/back face. We need to add face support to lp_build_system_values_array and use it in draw_llvm.c.
+
+- Replace constants and immediates which are 0,1,-1 or a combination of those with a swizzle.
+
+- i915_delete_fs_state doesn't call draw_delete_fragment_shader. Why?
+
Other bugs can be found here:
https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g
diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c
index 4a97746e981..fcb208d6dae 100644
--- a/src/gallium/drivers/i915/i915_clear.c
+++ b/src/gallium/drivers/i915/i915_clear.c
@@ -66,7 +66,7 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba,
else
clear_color = (u_color.ui & 0xffff) | (u_color.ui << 16);
- util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &u_color);
+ util_pack_color(rgba, cbuf->format, &u_color);
clear_color8888 = u_color.ui;
} else
clear_color = clear_color8888 = 0;
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 7a98ef73c1f..28ff40a2328 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -29,6 +29,7 @@
#include "i915_state.h"
#include "i915_screen.h"
#include "i915_surface.h"
+#include "i915_query.h"
#include "i915_batch.h"
#include "i915_resource.h"
@@ -53,13 +54,11 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
struct i915_context *i915 = i915_context(pipe);
struct draw_context *draw = i915->draw;
void *mapped_indices = NULL;
- unsigned cbuf_dirty;
/*
* Ack vs contants here, helps ipers a lot.
*/
- cbuf_dirty = i915->dirty & I915_NEW_VS_CONSTANTS;
i915->dirty &= ~I915_NEW_VS_CONSTANTS;
if (i915->dirty)
@@ -72,15 +71,13 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
mapped_indices = i915_buffer(i915->index_buffer.buffer)->data;
draw_set_mapped_index_buffer(draw, mapped_indices);
- if (cbuf_dirty) {
- if (i915->constants[PIPE_SHADER_VERTEX])
- draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
- i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data,
- (i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
- 4 * sizeof(float)));
- else
- draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0);
- }
+ if (i915->constants[PIPE_SHADER_VERTEX])
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
+ i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data,
+ (i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
+ 4 * sizeof(float)));
+ else
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0);
/*
* Do the drawing
@@ -106,7 +103,7 @@ static void i915_destroy(struct pipe_context *pipe)
if (i915->blitter)
util_blitter_destroy(i915->blitter);
-
+
if(i915->batch)
i915->iws->batchbuffer_destroy(i915->batch);
@@ -150,6 +147,8 @@ i915_create_context(struct pipe_screen *screen, void *priv)
/* init this before draw */
util_slab_create(&i915->transfer_pool, sizeof(struct pipe_transfer),
16, UTIL_SLAB_SINGLETHREADED);
+ util_slab_create(&i915->texture_transfer_pool, sizeof(struct i915_transfer),
+ 16, UTIL_SLAB_SINGLETHREADED);
/* Batch stream debugging is a bit hacked up at the moment:
*/
@@ -170,9 +169,11 @@ i915_create_context(struct pipe_screen *screen, void *priv)
i915_init_state_functions(i915);
i915_init_flush_functions(i915);
i915_init_resource_functions(i915);
+ i915_init_query_functions(i915);
draw_install_aaline_stage(i915->draw, &i915->base);
draw_install_aapoint_stage(i915->draw, &i915->base);
+ draw_enable_point_sprites(i915->draw, TRUE);
/* augmented draw pipeline clobbers state functions */
i915_init_fixup_state_functions(i915);
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 964948edc0e..c964208fedd 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -102,6 +102,8 @@ struct i915_fragment_shader
struct tgsi_shader_info info;
+ struct draw_fragment_shader *draw_data;
+
uint *program;
uint program_len;
@@ -260,6 +262,7 @@ struct i915_context {
int num_validation_buffers;
struct util_slab_mempool transfer_pool;
+ struct util_slab_mempool texture_transfer_pool;
/** blitter/hw-clear */
struct blitter_context* blitter;
diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h
index 2f0f99d0468..509395cf1f5 100644
--- a/src/gallium/drivers/i915/i915_fpc.h
+++ b/src/gallium/drivers/i915/i915_fpc.h
@@ -37,6 +37,9 @@
#define I915_PROGRAM_SIZE 192
+/* Use those indices for pos/face routing, must be >= I915_TEX_UNITS */
+#define I915_SEMANTIC_POS 10
+#define I915_SEMANTIC_FACE 11
/**
@@ -67,13 +70,13 @@ struct i915_fp_compile {
uint temp_flag; /**< Tracks temporary regs which are in use */
uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */
+ uint register_phases[16];
uint nr_tex_indirect;
uint nr_tex_insn;
uint nr_alu_insn;
uint nr_decl_insn;
boolean error; /**< Set if i915_program_error() is called */
- uint wpos_tex;
uint NumNativeInstructions;
uint NumNativeAluInstructions;
uint NumNativeTexInstructions;
diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c
index 76c24d2b2fd..d28595e0fd3 100644
--- a/src/gallium/drivers/i915/i915_fpc_emit.c
+++ b/src/gallium/drivers/i915/i915_fpc_emit.c
@@ -67,7 +67,7 @@ i915_get_temp(struct i915_fp_compile *p)
{
int bit = ffs(~p->temp_flag);
if (!bit) {
- i915_program_error(p, "i915_get_temp: out of temporaries\n");
+ i915_program_error(p, "i915_get_temp: out of temporaries");
return 0;
}
@@ -92,7 +92,7 @@ i915_get_utemp(struct i915_fp_compile * p)
{
int bit = ffs(~p->utemp_flag);
if (!bit) {
- i915_program_error(p, "i915_get_utemp: out of temporaries\n");
+ i915_program_error(p, "i915_get_utemp: out of temporaries");
return 0;
}
@@ -128,9 +128,13 @@ i915_emit_decl(struct i915_fp_compile *p,
else
return reg;
- *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
- *(p->decl++) = D1_MBZ;
- *(p->decl++) = D2_MBZ;
+ if (p->decl< p->declarations + I915_PROGRAM_SIZE) {
+ *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
+ *(p->decl++) = D1_MBZ;
+ *(p->decl++) = D2_MBZ;
+ }
+ else
+ i915_program_error(p, "Out of declarations");
p->nr_decl_insn++;
return reg;
@@ -187,9 +191,16 @@ i915_emit_arith(struct i915_fp_compile * p,
p->utemp_flag = old_utemp_flag; /* restore */
}
- *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
- *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
- *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+ if (p->csr< p->program + I915_PROGRAM_SIZE) {
+ *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
+ *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
+ *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+ }
+ else
+ i915_program_error(p, "Out of instructions");
+
+ if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+ p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
p->nr_alu_insn++;
return dest;
@@ -245,17 +256,31 @@ uint i915_emit_texld( struct i915_fp_compile *p,
assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
- /* is the sampler coord a texcoord input reg? */
- if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
- p->nr_tex_indirect++;
- }
+ /* Output register being oC or oD defines a phase boundary */
+ if (GET_UREG_TYPE(dest) == REG_TYPE_OC ||
+ GET_UREG_TYPE(dest) == REG_TYPE_OD)
+ p->nr_tex_indirect++;
- *(p->csr++) = (opcode |
- T0_DEST( dest ) |
- T0_SAMPLER( sampler ));
+ /* Reading from an r# register whose contents depend on output of the
+ * current phase defines a phase boundary.
+ */
+ if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
+ p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
+ p->nr_tex_indirect++;
+
+ if (p->csr< p->program + I915_PROGRAM_SIZE) {
+ *(p->csr++) = (opcode |
+ T0_DEST( dest ) |
+ T0_SAMPLER( sampler ));
+
+ *(p->csr++) = T1_ADDRESS_REG( coord );
+ *(p->csr++) = T2_MBZ;
+ }
+ else
+ i915_program_error(p, "Out of instructions");
- *(p->csr++) = T1_ADDRESS_REG( coord );
- *(p->csr++) = T2_MBZ;
+ if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+ p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
p->nr_tex_insn++;
}
@@ -293,7 +318,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0)
}
}
- i915_program_error(p, "i915_emit_const1f: out of constants\n");
+ i915_program_error(p, "i915_emit_const1f: out of constants");
return 0;
}
@@ -313,6 +338,8 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
if (c1 == 1.0)
return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
+ // XXX emit swizzle here for 0, 1, -1 and any combination thereof
+ // we can use swizzle + neg for that
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf ||
ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
@@ -329,12 +356,10 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
}
}
- i915_program_error(p, "i915_emit_const2f: out of constants\n");
+ i915_program_error(p, "i915_emit_const2f: out of constants");
return 0;
}
-
-
uint
i915_emit_const4f(struct i915_fp_compile * p,
float c0, float c1, float c2, float c3)
@@ -342,6 +367,9 @@ i915_emit_const4f(struct i915_fp_compile * p,
struct i915_fragment_shader *ifs = p->shader;
unsigned reg;
+ // XXX emit swizzle here for 0, 1, -1 and any combination thereof
+ // we can use swizzle + neg for that
+ printf("const %f %f %f %f\n",c0,c1,c2,c3);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf &&
ifs->constants[reg][0] == c0 &&
@@ -363,7 +391,7 @@ i915_emit_const4f(struct i915_fp_compile * p,
}
}
- i915_program_error(p, "i915_emit_const4f: out of constants\n");
+ i915_program_error(p, "i915_emit_const4f: out of constants");
return 0;
}
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 27f100843bf..0cbd4f2d748 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -41,6 +41,9 @@
#include "draw/draw_vertex.h"
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
/**
* Simple pass-through fragment shader to use when we don't have
@@ -72,19 +75,33 @@ static unsigned passthrough[] =
/* 1, -1/3!, 1/5!, -1/7! */
-static const float sin_constants[4] = { 1.0,
+static const float scs_sin_constants[4] = { 1.0,
-1.0f / (3 * 2 * 1),
1.0f / (5 * 4 * 3 * 2 * 1),
-1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
};
/* 1, -1/2!, 1/4!, -1/6! */
-static const float cos_constants[4] = { 1.0,
+static const float scs_cos_constants[4] = { 1.0,
-1.0f / (2 * 1),
1.0f / (4 * 3 * 2 * 1),
-1.0f / (6 * 5 * 4 * 3 * 2 * 1)
};
+/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */
+static const float sin_constants[4] = { 2.0 * M_PI,
+ -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1),
+ 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1),
+ -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1)
+};
+
+/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */
+static const float cos_constants[4] = { 1.0,
+ -4.0f * M_PI * M_PI / (2 * 1),
+ 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1),
+ -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1)
+};
+
/**
@@ -185,12 +202,12 @@ src_vector(struct i915_fp_compile *p,
switch (sem_name) {
case TGSI_SEMANTIC_POSITION:
- debug_printf("SKIP SEM POS\n");
- /*
- assert(p->wpos_tex != -1);
- src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
- */
- break;
+ {
+ /* for fragcoord */
+ int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS);
+ src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
+ break;
+ }
case TGSI_SEMANTIC_COLOR:
if (sem_ind == 0) {
src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
@@ -212,6 +229,13 @@ src_vector(struct i915_fp_compile *p,
src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
break;
}
+ case TGSI_SEMANTIC_FACE:
+ {
+ /* for back/front faces */
+ int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE);
+ src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
+ break;
+ }
default:
i915_program_error(p, "Bad source->Index");
return 0;
@@ -237,7 +261,6 @@ src_vector(struct i915_fp_compile *p,
source->Register.SwizzleZ,
source->Register.SwizzleW);
-
/* There's both negate-all-components and per-component negation.
* Try to handle both here.
*/
@@ -252,6 +275,9 @@ src_vector(struct i915_fp_compile *p,
/* XXX enable these assertions, or fix things */
assert(!source->Register.Absolute);
#endif
+ if (source->Register.Absolute)
+ debug_printf("Unhandled absolute value\n");
+
return src;
}
@@ -419,11 +445,6 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
emit_simple_arith(p, &inst2, opcode, numArgs, fs);
}
-
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
/*
* Translate TGSI instruction to i915 instruction.
*
@@ -477,13 +498,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
- /* By choosing different taylor constants, could get rid of this mul:
- */
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_X, 0,
- tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
-
/*
* t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
@@ -516,6 +530,18 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_const4fv(p, cos_constants), 0);
break;
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ /* XXX We just output 0 here */
+ debug_printf("Punting DDX/DDX\n");
+ src0 = get_result_vector(p, &inst->Dst[0]);
+ i915_emit_arith(p,
+ A0_MOV,
+ get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
+ break;
+
case TGSI_OPCODE_DP2:
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
@@ -754,9 +780,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
* t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
* t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
- * scs.x = DP4 t1, sin_constants
+ * scs.x = DP4 t1, scs_sin_constants
* t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
- * scs.y = DP4 t1, cos_constants
+ * scs.y = DP4 t1, scs_cos_constants
*/
i915_emit_arith(p,
A0_MUL,
@@ -791,7 +817,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
get_result_vector(p, &inst->Dst[0]),
A0_DEST_CHANNEL_Y, 0,
swizzle(tmp1, W, Z, Y, X),
- i915_emit_const4fv(p, sin_constants), 0);
+ i915_emit_const4fv(p, scs_sin_constants), 0);
}
if (writemask & TGSI_WRITEMASK_X) {
@@ -806,7 +832,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
get_result_vector(p, &inst->Dst[0]),
A0_DEST_CHANNEL_X, 0,
swizzle(tmp, ONE, Z, Y, X),
- i915_emit_const4fv(p, cos_constants), 0);
+ i915_emit_const4fv(p, scs_cos_constants), 0);
}
break;
@@ -853,13 +879,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
- /* By choosing different taylor constants, could get rid of this mul:
- */
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_X, 0,
- tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
-
/*
* t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
@@ -907,7 +926,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_SNE:
- /* if we're neither < nor > then we're != */
+ /* if we're < or > then we're != */
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
@@ -1070,9 +1089,11 @@ i915_translate_instructions(struct i915_fp_compile *p,
for (i = parse.FullToken.FullDeclaration.Range.First;
i <= parse.FullToken.FullDeclaration.Range.Last;
i++) {
- assert(i < I915_MAX_TEMPORARY);
- /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
- p->temp_flag |= (1 << i); /* mark temp as used */
+ if (i >= I915_MAX_TEMPORARY)
+ debug_printf("Too many temps (%d)\n",i);
+ else
+ /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
+ p->temp_flag |= (1 << i); /* mark temp as used */
}
}
break;
@@ -1144,6 +1165,8 @@ i915_init_compile(struct i915_context *i915,
ifs->num_constants = 0;
memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
+ memset(&p->register_phases, 0, sizeof(p->register_phases));
+
for (i = 0; i < I915_TEX_UNITS; i++)
ifs->generic_mapping[i] = -1;
@@ -1161,8 +1184,6 @@ i915_init_compile(struct i915_context *i915,
p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
p->utemp_flag = ~0x7;
- p->wpos_tex = -1;
-
/* initialize the first program word */
*(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
@@ -1181,7 +1202,7 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
- i915_program_error(p, "Exceeded max nr indirect texture lookups");
+ debug_printf("Exceeded max nr indirect texture lookups\n");
if (p->nr_tex_insn > I915_MAX_TEX_INSN)
i915_program_error(p, "Exceeded max TEX instructions");
@@ -1234,40 +1255,6 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
}
-/**
- * Find an unused texture coordinate slot to use for fragment WPOS.
- * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found).
- */
-static void
-i915_find_wpos_space(struct i915_fp_compile *p)
-{
-#if 0
- const uint inputs
- = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/
- uint i;
-
- p->wpos_tex = -1;
-
- if (inputs & (1 << TGSI_ATTRIB_POS)) {
- for (i = 0; i < I915_TEX_UNITS; i++) {
- if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) {
- p->wpos_tex = i;
- return;
- }
- }
-
- i915_program_error(p, "No free texcoord for wpos value");
- }
-#else
- if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
- /* frag shader using the fragment position input */
-#if 0
- assert(0);
-#endif
- }
-#endif
-}
-
@@ -1314,7 +1301,6 @@ i915_translate_fragment_program( struct i915_context *i915,
}
p = i915_init_compile(i915, fs);
- i915_find_wpos_space(p);
i915_translate_instructions(p, tokens, fs);
i915_fixup_depth_write(p);
diff --git a/src/gallium/drivers/i915/i915_query.c b/src/gallium/drivers/i915/i915_query.c
new file mode 100644
index 00000000000..c886df74bad
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_query.c
@@ -0,0 +1,86 @@
+/**************************************************************************
+ *
+ * Copyright 2011 The Chromium OS authors.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Fake occlusion queries which return 0, it's better than crashing */
+
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+
+#include "i915_context.h"
+#include "i915_query.h"
+
+struct i915_query
+{
+ unsigned query;
+};
+
+static struct pipe_query *i915_create_query(struct pipe_context *ctx,
+ unsigned query_type)
+{
+ struct i915_query *query = CALLOC_STRUCT( i915_query );
+
+ return (struct pipe_query *)query;
+}
+
+static void i915_destroy_query(struct pipe_context *ctx,
+ struct pipe_query *query)
+{
+ FREE(query);
+}
+
+static void i915_begin_query(struct pipe_context *ctx,
+ struct pipe_query *query)
+{
+}
+
+static void i915_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+}
+
+static boolean i915_get_query_result(struct pipe_context *ctx,
+ struct pipe_query *query,
+ boolean wait,
+ void *vresult)
+{
+ uint64_t *result = (uint64_t*)vresult;
+
+ /* 2* viewport Max */
+ *result = 512*1024*1024;
+ return TRUE;
+}
+
+void
+i915_init_query_functions(struct i915_context *i915)
+{
+ i915->base.create_query = i915_create_query;
+ i915->base.destroy_query = i915_destroy_query;
+ i915->base.begin_query = i915_begin_query;
+ i915->base.end_query = i915_end_query;
+ i915->base.get_query_result = i915_get_query_result;
+}
+
diff --git a/src/gallium/drivers/i915/i915_query.h b/src/gallium/drivers/i915/i915_query.h
new file mode 100644
index 00000000000..2c689ea6b1c
--- /dev/null
+++ b/src/gallium/drivers/i915/i915_query.h
@@ -0,0 +1,36 @@
+/**************************************************************************
+ *
+ * Copyright 2011 The Chromium OS authors.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef I915_QUERY_H
+#define I915_QUERY_H
+
+struct i915_context;
+struct pipe_context;
+
+void i915_init_query_functions( struct i915_context *i915 );
+
+#endif /* I915_QUERY_H */
diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h
index 6fe032cdb6e..14e786d0f2a 100644
--- a/src/gallium/drivers/i915/i915_reg.h
+++ b/src/gallium/drivers/i915/i915_reg.h
@@ -170,6 +170,13 @@
#define COLOR_BUF_RGB555 (1<<8)
#define COLOR_BUF_RGB565 (2<<8)
#define COLOR_BUF_ARGB8888 (3<<8)
+#define COLOR_BUF_YCRCB_SWAP (4<<8)
+#define COLOR_BUF_YCRCB_NORMAL (5<<8)
+#define COLOR_BUF_YCRCB_SWAPUV (6<<8)
+#define COLOR_BUF_YCRCB_SWAPUVY (7<<8)
+#define COLOR_BUF_ARGB4444 (8<<8)
+#define COLOR_BUF_ARGB1555 (9<<8)
+#define COLOR_BUF_ARGB2101010 (10<<8)
#define DEPTH_FRMT_16_FIXED 0
#define DEPTH_FRMT_16_FLOAT (1<<2)
#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
diff --git a/src/gallium/drivers/i915/i915_resource.c b/src/gallium/drivers/i915/i915_resource.c
index 7f52ba11d61..b4719af1fb6 100644
--- a/src/gallium/drivers/i915/i915_resource.c
+++ b/src/gallium/drivers/i915/i915_resource.c
@@ -7,12 +7,12 @@
static struct pipe_resource *
i915_resource_create(struct pipe_screen *screen,
- const struct pipe_resource *template)
+ const struct pipe_resource *template)
{
if (template->target == PIPE_BUFFER)
return i915_buffer_create(screen, template);
else
- return i915_texture_create(screen, template);
+ return i915_texture_create(screen, template, FALSE);
}
diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h
index c15ecdfc22a..14eed2c4a79 100644
--- a/src/gallium/drivers/i915/i915_resource.h
+++ b/src/gallium/drivers/i915/i915_resource.h
@@ -45,6 +45,15 @@ struct i915_buffer {
boolean free_on_destroy;
};
+
+/* Texture transfer. */
+struct i915_transfer {
+ /* Base class. */
+ struct pipe_transfer b;
+ struct pipe_resource *staging_texture;
+};
+
+
#define I915_MAX_TEXTURE_2D_LEVELS 12 /* max 2048x2048 */
#define I915_MAX_TEXTURE_3D_LEVELS 9 /* max 256x256x256 */
@@ -101,7 +110,8 @@ static INLINE struct i915_buffer *i915_buffer(struct pipe_resource *resource)
struct pipe_resource *
i915_texture_create(struct pipe_screen *screen,
- const struct pipe_resource *template);
+ const struct pipe_resource *template,
+ boolean force_untiled);
struct pipe_resource *
i915_texture_from_handle(struct pipe_screen * screen,
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index b74b19d0fe4..0b6424f8d16 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -37,6 +37,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_rect.h"
#include "i915_context.h"
#include "i915_resource.h"
@@ -710,7 +711,7 @@ i915_texture_destroy(struct pipe_screen *screen,
FREE(tex);
}
-static struct pipe_transfer *
+static struct pipe_transfer *
i915_texture_get_transfer(struct pipe_context *pipe,
struct pipe_resource *resource,
unsigned level,
@@ -719,19 +720,45 @@ i915_texture_get_transfer(struct pipe_context *pipe,
{
struct i915_context *i915 = i915_context(pipe);
struct i915_texture *tex = i915_texture(resource);
- struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
+ struct i915_transfer *transfer = util_slab_alloc(&i915->texture_transfer_pool);
+ boolean use_staging_texture = FALSE;
if (transfer == NULL)
return NULL;
- transfer->resource = resource;
- transfer->level = level;
- transfer->usage = usage;
- transfer->box = *box;
- transfer->stride = tex->stride;
- /* FIXME: layer_stride */
+ transfer->b.resource = resource;
+ transfer->b.level = level;
+ transfer->b.usage = usage;
+ transfer->b.box = *box;
+ transfer->b.stride = tex->stride;
+ transfer->staging_texture = NULL;
+ /* XXX: handle depth textures everyhwere*/
+ transfer->b.layer_stride = 0;
+ transfer->b.data = NULL;
+
+ /* if we use staging transfers, only support textures we can render to,
+ * because we need that for u_blitter */
+ if (i915->blitter &&
+ i915_is_format_supported(NULL, /* screen */
+ transfer->b.resource->format,
+ 0, /* target */
+ 1, /* sample count */
+ PIPE_BIND_RENDER_TARGET) &&
+ (usage & PIPE_TRANSFER_WRITE) &&
+ !(usage & (PIPE_TRANSFER_READ | PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_UNSYNCHRONIZED)))
+ use_staging_texture = TRUE;
+
+ use_staging_texture = FALSE;
+
+ if (use_staging_texture) {
+ /*
+ * Allocate the untiled staging texture.
+ * If the alloc fails, transfer->staging_texture is NULL and we fallback to a map()
+ */
+ transfer->staging_texture = i915_texture_create(pipe->screen, resource, TRUE);
+ }
- return transfer;
+ return (struct pipe_transfer*)transfer;
}
static void
@@ -739,17 +766,33 @@ i915_transfer_destroy(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
struct i915_context *i915 = i915_context(pipe);
- util_slab_free(&i915->transfer_pool, transfer);
+ struct i915_transfer *itransfer = (struct i915_transfer*)transfer;
+
+ if ((itransfer->staging_texture) &&
+ (transfer->usage & PIPE_TRANSFER_WRITE)) {
+ struct pipe_box sbox;
+
+ u_box_origin_2d(itransfer->b.box.width, itransfer->b.box.height, &sbox);
+ pipe->resource_copy_region(pipe, itransfer->b.resource, itransfer->b.level,
+ itransfer->b.box.x, itransfer->b.box.y, itransfer->b.box.z,
+ itransfer->staging_texture,
+ 0, &sbox);
+ pipe->flush(pipe, NULL);
+ pipe_resource_reference(&itransfer->staging_texture, NULL);
+ }
+
+ util_slab_free(&i915->texture_transfer_pool, itransfer);
}
static void *
i915_texture_transfer_map(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
- struct pipe_resource *resource = transfer->resource;
- struct i915_texture *tex = i915_texture(resource);
+ struct i915_transfer *itransfer = (struct i915_transfer*)transfer;
+ struct pipe_resource *resource = itransfer->b.resource;
+ struct i915_texture *tex = NULL;
struct i915_winsys *iws = i915_screen(pipe->screen)->iws;
- struct pipe_box *box = &transfer->box;
+ struct pipe_box *box = &itransfer->b.box;
enum pipe_format format = resource->format;
unsigned offset;
char *map;
@@ -757,18 +800,25 @@ i915_texture_transfer_map(struct pipe_context *pipe,
if (resource->target != PIPE_TEXTURE_3D &&
resource->target != PIPE_TEXTURE_CUBE)
assert(box->z == 0);
- offset = i915_texture_offset(tex, transfer->level, box->z);
- /* TODO this is a sledgehammer */
- pipe->flush(pipe, NULL);
+ if (itransfer->staging_texture) {
+ tex = i915_texture(itransfer->staging_texture);
+ } else {
+ /* TODO this is a sledgehammer */
+ tex = i915_texture(resource);
+ pipe->flush(pipe, NULL);
+ }
+
+ offset = i915_texture_offset(tex, itransfer->b.level, box->z);
map = iws->buffer_map(iws, tex->buffer,
- (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE);
- if (map == NULL)
+ (itransfer->b.usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE);
+ if (map == NULL) {
return NULL;
+ }
return map + offset +
- box->y / util_format_get_blockheight(format) * transfer->stride +
+ box->y / util_format_get_blockheight(format) * itransfer->b.stride +
box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
}
@@ -776,14 +826,106 @@ static void
i915_texture_transfer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
- struct i915_texture *tex = i915_texture(transfer->resource);
+ struct i915_transfer *itransfer = (struct i915_transfer*)transfer;
+ struct i915_texture *tex = i915_texture(itransfer->b.resource);
struct i915_winsys *iws = i915_screen(tex->b.b.screen)->iws;
+
+ if (itransfer->staging_texture)
+ tex = i915_texture(itransfer->staging_texture);
+
iws->buffer_unmap(iws, tex->buffer);
}
+static void i915_transfer_inline_write( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ struct pipe_transfer *transfer = NULL;
+ struct i915_transfer *itransfer = NULL;
+ const uint8_t *src_data = data;
+ unsigned i;
+
+ transfer = pipe->get_transfer(pipe,
+ resource,
+ level,
+ usage,
+ box );
+ if (transfer == NULL)
+ goto out;
+
+ itransfer = (struct i915_transfer*)transfer;
+
+ if (itransfer->staging_texture) {
+ struct i915_texture *tex = i915_texture(itransfer->staging_texture);
+ enum pipe_format format = tex->b.b.format;
+ struct i915_winsys *iws = i915_screen(tex->b.b.screen)->iws;
+ size_t offset;
+ size_t size;
+
+ offset = i915_texture_offset(tex, transfer->level, transfer->box.z);
+
+ for (i = 0; i < box->depth; i++) {
+ if (!tex->b.b.last_level &&
+ tex->b.b.width0 == transfer->box.width) {
+ unsigned nby = util_format_get_nblocksy(format, transfer->box.y);
+ assert(!offset);
+ assert(!transfer->box.x);
+ assert(tex->stride == transfer->stride);
+
+ offset += tex->stride * nby;
+ size = util_format_get_2d_size(format, transfer->stride,
+ transfer->box.height);
+ iws->buffer_write(iws, tex->buffer, offset, size, transfer->data);
+
+ } else {
+ unsigned nby = util_format_get_nblocksy(format, transfer->box.y);
+ int i;
+ offset += util_format_get_stride(format, transfer->box.x);
+ size = transfer->stride;
+
+ for (i = 0; i < nby; i++) {
+ iws->buffer_write(iws, tex->buffer, offset, size, transfer->data);
+ offset += tex->stride;
+ }
+ }
+ offset += layer_stride;
+ }
+ } else {
+ uint8_t *map = pipe_transfer_map(pipe, &itransfer->b);
+ if (map == NULL)
+ goto nomap;
+
+ for (i = 0; i < box->depth; i++) {
+ util_copy_rect(map,
+ resource->format,
+ itransfer->b.stride, /* bytes */
+ 0, 0,
+ box->width,
+ box->height,
+ src_data,
+ stride, /* bytes */
+ 0, 0);
+ map += itransfer->b.layer_stride;
+ src_data += layer_stride;
+ }
+nomap:
+ if (map)
+ pipe_transfer_unmap(pipe, &itransfer->b);
+ }
+
+out:
+ if (itransfer)
+ pipe_transfer_destroy(pipe, &itransfer->b);
+}
-struct u_resource_vtbl i915_texture_vtbl =
+
+struct u_resource_vtbl i915_texture_vtbl =
{
i915_texture_get_handle, /* get_handle */
i915_texture_destroy, /* resource_destroy */
@@ -792,7 +934,7 @@ struct u_resource_vtbl i915_texture_vtbl =
i915_texture_transfer_map, /* transfer_map */
u_default_transfer_flush_region, /* transfer_flush_region */
i915_texture_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ i915_transfer_inline_write /* transfer_inline_write */
};
@@ -800,7 +942,8 @@ struct u_resource_vtbl i915_texture_vtbl =
struct pipe_resource *
i915_texture_create(struct pipe_screen *screen,
- const struct pipe_resource *template)
+ const struct pipe_resource *template,
+ boolean force_untiled)
{
struct i915_screen *is = i915_screen(screen);
struct i915_winsys *iws = is->iws;
@@ -815,7 +958,10 @@ i915_texture_create(struct pipe_screen *screen,
pipe_reference_init(&tex->b.b.reference, 1);
tex->b.b.screen = screen;
- tex->tiling = i915_texture_tiling(is, tex);
+ if (force_untiled)
+ tex->tiling = I915_TILE_NONE;
+ else
+ tex->tiling = i915_texture_tiling(is, tex);
if (is->is_i945) {
if (!i945_texture_layout(tex))
@@ -836,7 +982,7 @@ i915_texture_create(struct pipe_screen *screen,
buf_usage = I915_NEW_TEXTURE;
tex->buffer = iws->buffer_create_tiled(iws, &tex->stride, tex->total_nblocksy,
- &tex->tiling, buf_usage);
+ &tex->tiling, buf_usage);
if (!tex->buffer)
goto fail;
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index c86baa58b28..e743f6031eb 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -109,17 +109,17 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_POINT_SPRITE:
case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
return 1;
/* Features that should be supported (boolean caps). */
/* XXX: Just test the code */
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- /* XXX: No code but hw supports it */
- case PIPE_CAP_POINT_SPRITE:
/* Also lie about these when asked to (needed for GLSL / GL 2.0) */
return is->debug.lie ? 1 : 0;
@@ -129,7 +129,6 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TGSI_INSTANCEID:
- case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_SWIZZLE:
@@ -254,7 +253,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap)
}
}
-static boolean
+boolean
i915_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
@@ -264,7 +263,10 @@ i915_is_format_supported(struct pipe_screen *screen,
static const enum pipe_format tex_supported[] = {
PIPE_FORMAT_B8G8R8A8_UNORM,
PIPE_FORMAT_B8G8R8X8_UNORM,
+ PIPE_FORMAT_R8G8B8A8_UNORM,
+ PIPE_FORMAT_R8G8B8X8_UNORM,
PIPE_FORMAT_B5G6R5_UNORM,
+ PIPE_FORMAT_B10G10R10A2_UNORM,
PIPE_FORMAT_L8_UNORM,
PIPE_FORMAT_A8_UNORM,
PIPE_FORMAT_I8_UNORM,
@@ -283,7 +285,12 @@ i915_is_format_supported(struct pipe_screen *screen,
};
static const enum pipe_format render_supported[] = {
PIPE_FORMAT_B8G8R8A8_UNORM,
+ PIPE_FORMAT_R8G8B8A8_UNORM,
PIPE_FORMAT_B5G6R5_UNORM,
+ PIPE_FORMAT_B10G10R10A2_UNORM,
+ PIPE_FORMAT_L8_UNORM,
+ PIPE_FORMAT_A8_UNORM,
+ PIPE_FORMAT_I8_UNORM,
PIPE_FORMAT_NONE /* list terminator */
};
static const enum pipe_format depth_supported[] = {
diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h
index cfc585b5350..9f2004eb942 100644
--- a/src/gallium/drivers/i915/i915_screen.h
+++ b/src/gallium/drivers/i915/i915_screen.h
@@ -65,5 +65,11 @@ i915_screen(struct pipe_screen *pscreen)
return (struct i915_screen *) pscreen;
}
+boolean
+i915_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned tex_usage);
#endif /* I915_SCREEN_H */
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 1b57c5776f2..f412626955d 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -146,6 +146,7 @@ i915_create_blend_state(struct pipe_context *pipe,
if (blend->dither)
cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE;
+ /* XXX here take the target fixup into account */
if ((blend->rt[0].colormask & PIPE_MASK_R) == 0)
cso_data->LIS5 |= S5_WRITEDISABLE_RED;
@@ -246,7 +247,7 @@ i915_create_sampler_state(struct pipe_context *pipe,
if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
{
cso->state[0] |= (SS2_SHADOW_ENABLE |
- i915_translate_compare_func(sampler->compare_func));
+ i915_translate_shadow_compare_func(sampler->compare_func));
minFilt = FILTER_4X4_FLAT;
magFilt = FILTER_4X4_FLAT;
@@ -466,6 +467,7 @@ i915_create_fs_state(struct pipe_context *pipe,
if (!ifs)
return NULL;
+ ifs->draw_data = draw_create_fragment_shader(i915->draw, templ);
ifs->state.tokens = tgsi_dup_tokens(templ->tokens);
tgsi_scan_shader(templ->tokens, &ifs->info);
@@ -495,6 +497,8 @@ i915_bind_fs_state(struct pipe_context *pipe, void *shader)
i915->fs = (struct i915_fragment_shader*) shader;
+ draw_bind_fragment_shader(i915->draw, (i915->fs ? i915->fs->draw_data : NULL));
+
i915->dirty |= I915_NEW_FS;
}
@@ -503,12 +507,14 @@ void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
{
struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader;
- if (ifs->program)
+ if (ifs->program) {
FREE(ifs->program);
+ ifs->program = NULL;
+ FREE((struct tgsi_token *)ifs->state.tokens);
+ ifs->state.tokens = NULL;
+ }
ifs->program_len = 0;
- FREE((struct tgsi_token *)ifs->state.tokens);
-
FREE(ifs);
}
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index bf6b30a4530..e01f16e715c 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -33,9 +33,10 @@
#include "i915_context.h"
#include "i915_state.h"
#include "i915_debug.h"
+#include "i915_fpc.h"
#include "i915_reg.h"
-static uint find_mapping(struct i915_fragment_shader* fs, int unit)
+static uint find_mapping(const struct i915_fragment_shader* fs, int unit)
{
int i;
for (i = 0; i < I915_TEX_UNITS ; i++)
@@ -58,12 +59,12 @@ static void calculate_vertex_layout(struct i915_context *i915)
const struct i915_fragment_shader *fs = i915->fs;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
- boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW;
+ boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW, face;
uint i;
int src;
memset(texCoords, 0, sizeof(texCoords));
- colors[0] = colors[1] = fog = needW = FALSE;
+ colors[0] = colors[1] = fog = needW = face = FALSE;
memset(&vinfo, 0, sizeof(vinfo));
/* Determine which fragment program inputs are needed. Setup HW vertex
@@ -72,6 +73,10 @@ static void calculate_vertex_layout(struct i915_context *i915)
for (i = 0; i < fs->info.num_inputs; i++) {
switch (fs->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
+ {
+ uint unit = I915_SEMANTIC_POS;
+ texCoords[find_mapping(fs, unit)] = TRUE;
+ }
break;
case TGSI_SEMANTIC_COLOR:
assert(fs->info.input_semantic_index[i] < 2);
@@ -80,7 +85,6 @@ static void calculate_vertex_layout(struct i915_context *i915)
case TGSI_SEMANTIC_GENERIC:
{
/* texcoords/varyings/other generic */
- /* XXX handle back/front face and point size */
uint unit = fs->info.input_semantic_index[i];
texCoords[find_mapping(fs, unit)] = TRUE;
@@ -90,7 +94,11 @@ static void calculate_vertex_layout(struct i915_context *i915)
case TGSI_SEMANTIC_FOG:
fog = TRUE;
break;
+ case TGSI_SEMANTIC_FACE:
+ face = TRUE;
+ break;
default:
+ debug_printf("Unknown input type %d\n", fs->info.input_semantic_name[i]);
assert(0);
}
}
@@ -147,6 +155,20 @@ static void calculate_vertex_layout(struct i915_context *i915)
vinfo.hwfmt[1] |= hwtc << (i * 4);
}
+ /* front/back face */
+ if (face) {
+ uint slot = find_mapping(fs, I915_SEMANTIC_FACE);
+ debug_printf("Front/back face is broken\n");
+ /* XXX Because of limitations in the draw module, currently src will be 0
+ * for SEMANTIC_FACE, so this aliases to POS. We need to fix in the draw
+ * module by adding an extra shader output.
+ */
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FACE, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_CONSTANT, src);
+ vinfo.hwfmt[1] &= ~(TEXCOORDFMT_NOT_PRESENT << (slot * 4));
+ vinfo.hwfmt[1] |= TEXCOORDFMT_1D << (slot * 4);
+ }
+
draw_compute_vertex_size(&vinfo);
if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) {
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 0155cd83510..39fb13aec7e 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -34,7 +34,9 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -128,7 +130,7 @@ validate_immediate(struct i915_context *i915, unsigned *batch_space)
static void
emit_immediate(struct i915_context *i915)
{
- /* remove unwatned bits and S7 */
+ /* remove unwanted bits and S7 */
unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
@@ -341,6 +343,59 @@ emit_constants(struct i915_context *i915)
}
}
+static const struct
+{
+ enum pipe_format format;
+ uint hw_shift_R;
+ uint hw_shift_G;
+ uint hw_shift_B;
+ uint hw_shift_A;
+} fixup_formats[] = {
+ { PIPE_FORMAT_R8G8B8A8_UNORM, 20, 24, 28, 16 /* BGRA */},
+ { PIPE_FORMAT_L8_UNORM, 28, 28, 28, 16 /* RRRA */},
+ { PIPE_FORMAT_I8_UNORM, 28, 28, 28, 16 /* RRRA */},
+ { PIPE_FORMAT_A8_UNORM, 16, 16, 16, 16 /* AAAA */},
+ { PIPE_FORMAT_NONE, 0, 0, 0, 0},
+};
+
+static boolean need_fixup(struct pipe_surface* p)
+{
+ enum pipe_format f;
+
+ /* if we don't have a surface bound yet, we don't need to fixup the shader */
+ if (!p)
+ return FALSE;
+
+ f = p->format;
+ for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
+ if (fixup_formats[i].format == f)
+ return TRUE;
+
+ return FALSE;
+}
+
+static uint fixup_swizzle(enum pipe_format f, uint v)
+{
+ int i;
+
+ for(i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
+ if (fixup_formats[i].format == f)
+ break;
+
+ if (fixup_formats[i].format == PIPE_FORMAT_NONE)
+ return v;
+
+ uint rgba = v & 0xFFFF0000;
+
+ v &= 0xFFFF;
+ v |= ((rgba >> fixup_formats[i].hw_shift_R) & 0xF) << 28;
+ v |= ((rgba >> fixup_formats[i].hw_shift_G) & 0xF) << 24;
+ v |= ((rgba >> fixup_formats[i].hw_shift_B) & 0xF) << 20;
+ v |= ((rgba >> fixup_formats[i].hw_shift_A) & 0xF) << 16;
+
+ return v;
+}
+
static void
validate_program(struct i915_context *i915, unsigned *batch_space)
{
@@ -350,12 +405,39 @@ validate_program(struct i915_context *i915, unsigned *batch_space)
static void
emit_program(struct i915_context *i915)
{
- uint i;
- /* we should always have, at least, a pass-through program */
- assert(i915->fs->program_len > 0);
- for (i = 0; i < i915->fs->program_len; i++) {
- OUT_BATCH(i915->fs->program[i]);
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ boolean need_format_fixup = need_fixup(cbuf_surface);
+ int i;
+ int fixup_offset = -1;
+
+ /* we should always have, at least, a pass-through program */
+ assert(i915->fs->program_len > 0);
+
+ if (need_format_fixup) {
+ /* Find where we emit the output color */
+ for (i = i915->fs->program_len - 3; i>0; i-=3) {
+ uint instr = i915->fs->program[i];
+ if ((instr & (REG_NR_MASK << A0_DEST_TYPE_SHIFT)) ==
+ (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) ) {
+ /* Found it! */
+ fixup_offset = i + 1;
+ break;
+ }
+ }
+ if (fixup_offset == -1) {
+ need_format_fixup = FALSE;
+ debug_printf("couldn't find fixup offset\n");
}
+ }
+
+ /* emit the program to the hw */
+ for (i = 0; i < i915->fs->program_len; i++) {
+ if (need_format_fixup && (i == fixup_offset) ) {
+ uint v = fixup_swizzle(cbuf_surface->format, i915->fs->program[i]);
+ OUT_BATCH(v);
+ } else
+ OUT_BATCH(i915->fs->program[i]);
+ }
}
static void
diff --git a/src/gallium/drivers/i915/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h
index b589117fbfe..aa992f75c51 100644
--- a/src/gallium/drivers/i915/i915_state_inlines.h
+++ b/src/gallium/drivers/i915/i915_state_inlines.h
@@ -60,6 +60,31 @@ i915_translate_compare_func(unsigned func)
}
static INLINE unsigned
+i915_translate_shadow_compare_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return COMPAREFUNC_ALWAYS;
+ case PIPE_FUNC_LESS:
+ return COMPAREFUNC_LEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return COMPAREFUNC_LESS;
+ case PIPE_FUNC_GREATER:
+ return COMPAREFUNC_GEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return COMPAREFUNC_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return COMPAREFUNC_EQUAL;
+ case PIPE_FUNC_EQUAL:
+ return COMPAREFUNC_NOTEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return COMPAREFUNC_NEVER;
+ default:
+ return COMPAREFUNC_NEVER;
+ }
+}
+
+static INLINE unsigned
i915_translate_stencil_op(unsigned op)
{
switch (op) {
diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c
index be70e7a92c9..0103f7c3530 100644
--- a/src/gallium/drivers/i915/i915_state_sampler.c
+++ b/src/gallium/drivers/i915/i915_state_sampler.c
@@ -62,6 +62,7 @@ static void update_map(struct i915_context *i915,
uint unit,
const struct i915_texture *tex,
const struct i915_sampler_state *sampler,
+ const struct pipe_sampler_view* view,
uint state[2]);
@@ -161,9 +162,10 @@ static void update_samplers(struct i915_context *i915)
i915->current.sampler[unit]); /* the result */
update_map(i915,
unit,
- texture, /* texture */
- i915->sampler[unit], /* sampler state */
- i915->current.texbuffer[unit]); /* the result */
+ texture, /* texture */
+ i915->sampler[unit], /* sampler state */
+ i915->fragment_sampler_views[unit], /* sampler view */
+ i915->current.texbuffer[unit]); /* the result */
i915->current.sampler_enable_nr++;
i915->current.sampler_enable_flags |= (1 << unit);
@@ -180,13 +182,21 @@ struct i915_tracked_state i915_hw_samplers = {
};
-
/***********************************************************************
* Sampler views
*/
-static uint translate_texture_format(enum pipe_format pipeFormat)
+static uint translate_texture_format(enum pipe_format pipeFormat,
+ const struct pipe_sampler_view* view)
{
+ if ( (view->swizzle_r != PIPE_SWIZZLE_RED ||
+ view->swizzle_g != PIPE_SWIZZLE_GREEN ||
+ view->swizzle_b != PIPE_SWIZZLE_BLUE ||
+ view->swizzle_a != PIPE_SWIZZLE_ALPHA ) &&
+ pipeFormat != PIPE_FORMAT_Z24_UNORM_S8_USCALED &&
+ pipeFormat != PIPE_FORMAT_Z24X8_UNORM )
+ debug_printf("i915: unsupported texture swizzle for format %d\n", pipeFormat);
+
switch (pipeFormat) {
case PIPE_FORMAT_L8_UNORM:
return MAPSURF_8BIT | MT_8BIT_L8;
@@ -202,16 +212,16 @@ static uint translate_texture_format(enum pipe_format pipeFormat)
return MAPSURF_16BIT | MT_16BIT_ARGB1555;
case PIPE_FORMAT_B4G4R4A4_UNORM:
return MAPSURF_16BIT | MT_16BIT_ARGB4444;
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return MAPSURF_32BIT | MT_32BIT_ARGB2101010;
case PIPE_FORMAT_B8G8R8A8_UNORM:
return MAPSURF_32BIT | MT_32BIT_ARGB8888;
case PIPE_FORMAT_B8G8R8X8_UNORM:
return MAPSURF_32BIT | MT_32BIT_XRGB8888;
case PIPE_FORMAT_R8G8B8A8_UNORM:
return MAPSURF_32BIT | MT_32BIT_ABGR8888;
-#if 0
case PIPE_FORMAT_R8G8B8X8_UNORM:
return MAPSURF_32BIT | MT_32BIT_XBGR8888;
-#endif
case PIPE_FORMAT_YUYV:
return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
case PIPE_FORMAT_UYVY:
@@ -232,7 +242,25 @@ static uint translate_texture_format(enum pipe_format pipeFormat)
return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
- return (MAPSURF_32BIT | MT_32BIT_xI824);
+ {
+ if ( view->swizzle_r == PIPE_SWIZZLE_RED &&
+ view->swizzle_g == PIPE_SWIZZLE_RED &&
+ view->swizzle_b == PIPE_SWIZZLE_RED &&
+ view->swizzle_a == PIPE_SWIZZLE_ONE)
+ return (MAPSURF_32BIT | MT_32BIT_xA824);
+ if ( view->swizzle_r == PIPE_SWIZZLE_RED &&
+ view->swizzle_g == PIPE_SWIZZLE_RED &&
+ view->swizzle_b == PIPE_SWIZZLE_RED &&
+ view->swizzle_a == PIPE_SWIZZLE_RED)
+ return (MAPSURF_32BIT | MT_32BIT_xI824);
+ if ( view->swizzle_r == PIPE_SWIZZLE_ZERO &&
+ view->swizzle_g == PIPE_SWIZZLE_ZERO &&
+ view->swizzle_b == PIPE_SWIZZLE_ZERO &&
+ view->swizzle_a == PIPE_SWIZZLE_RED)
+ return (MAPSURF_32BIT | MT_32BIT_xL824);
+ debug_printf("i915: unsupported depth swizzle\n");
+ return (MAPSURF_32BIT | MT_32BIT_xL824);
+ }
default:
debug_printf("i915: translate_texture_format() bad image format %x\n",
pipeFormat);
@@ -262,6 +290,7 @@ static void update_map(struct i915_context *i915,
uint unit,
const struct i915_texture *tex,
const struct i915_sampler_state *sampler,
+ const struct pipe_sampler_view* view,
uint state[2])
{
const struct pipe_resource *pt = &tex->b.b;
@@ -275,7 +304,7 @@ static void update_map(struct i915_context *i915,
assert(height);
assert(depth);
- format = translate_texture_format(pt->format);
+ format = translate_texture_format(pt->format, view);
pitch = tex->stride;
assert(format);
@@ -318,8 +347,9 @@ static void update_maps(struct i915_context *i915)
update_map(i915,
unit,
- texture, /* texture */
- i915->sampler[unit], /* sampler state */
+ texture, /* texture */
+ i915->sampler[unit], /* sampler state */
+ i915->fragment_sampler_views[unit], /* sampler view */
i915->current.texbuffer[unit]);
}
}
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index 2865298318c..0e4000bc2ab 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -42,6 +42,18 @@ static unsigned translate_format(enum pipe_format format)
return COLOR_BUF_ARGB8888;
case PIPE_FORMAT_B5G6R5_UNORM:
return COLOR_BUF_RGB565;
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ return COLOR_BUF_ARGB1555;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return COLOR_BUF_ARGB8888;
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ return COLOR_BUF_ARGB4444;
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return COLOR_BUF_ARGB2101010;
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return COLOR_BUF_8BIT;
default:
assert(0);
return 0;
@@ -137,7 +149,8 @@ static void update_framebuffer(struct i915_context *i915)
i915->static_dirty |= I915_DST_RECT;
}
- i915->hardware_dirty |= I915_HW_STATIC;
+ /* we also send a new program to make sure the fixup for RGBA surfaces happens */
+ i915->hardware_dirty |= I915_HW_STATIC | I915_HW_PROGRAM;
/* flush the cache in case we sample from the old renderbuffers */
i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index c10a8cbc12c..d6b20ceb5ce 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -79,7 +79,7 @@ llvmpipe = env.ConvenienceLibrary(
env.Alias('llvmpipe', llvmpipe)
-if env['platform'] != 'embedded':
+if not env['embedded']:
env = env.Clone()
env.Prepend(LIBS = [llvmpipe] + gallium)
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 036a6e0c379..4b2ae1436ea 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -423,7 +423,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
lp_jit_screen_init(screen);
screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0;
-#ifdef PIPE_OS_EMBEDDED
+#ifdef PIPE_SUBSYSTEM_EMBEDDED
screen->num_threads = 0;
#endif
screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 8df7b236fe0..f4324e69971 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -423,6 +423,70 @@ lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src,
}
}
+static void
+lp_tile_b8g8r8x8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst,
+ const uint8_t * restrict src, unsigned src_stride,
+ unsigned x0, unsigned y0)
+{
+ __m128i *dst128 = (__m128i *) dst;
+ unsigned x, y;
+
+ src += y0 * src_stride;
+ src += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *src_row = src;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ swz4((const __m128i *) (src_row + 0 * src_stride),
+ (const __m128i *) (src_row + 1 * src_stride),
+ (const __m128i *) (src_row + 2 * src_stride),
+ (const __m128i *) (src_row + 3 * src_stride),
+ dst128 + 2, /* b */
+ dst128 + 1, /* g */
+ dst128 + 0, /* r */
+ dst128 + 3); /* a */
+
+ dst128 += 4;
+ src_row += sizeof(__m128i);
+ }
+
+ src += 4 * src_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8x8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src,
+ uint8_t * restrict dst, unsigned dst_stride,
+ unsigned x0, unsigned y0)
+{
+ unsigned int x, y;
+ const __m128i *src128 = (const __m128i *) src;
+
+ dst += y0 * dst_stride;
+ dst += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *dst_row = dst;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ unswz4( &src128[2], /* b */
+ &src128[1], /* g */
+ &src128[0], /* r */
+ &src128[3], /* a */
+ (__m128i *) (dst_row + 0 * dst_stride),
+ (__m128i *) (dst_row + 1 * dst_stride),
+ (__m128i *) (dst_row + 2 * dst_stride),
+ (__m128i *) (dst_row + 3 * dst_stride));
+
+ src128 += 4;
+ dst_row += sizeof(__m128i);;
+ }
+
+ dst += 4 * dst_stride;
+ }
+}
+
#endif /* PIPE_ARCH_SSE */
'''
@@ -446,7 +510,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
if is_format_supported(format):
print ' case %s:' % format.name
func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix)
- if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
+ if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM' or format.name == 'PIPE_FORMAT_B8G8R8X8_UNORM':
print '#ifdef PIPE_ARCH_SSE'
print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name)
print '#else'
@@ -484,7 +548,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
if is_format_supported(format):
print ' case %s:' % format.name
func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix)
- if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
+ if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM' or format.name == 'PIPE_FORMAT_B8G8R8X8_UNORM':
print '#ifdef PIPE_ARCH_SSE'
print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name)
print '#else'
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 401155bba6e..223e7682ccd 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -81,20 +81,6 @@ nouveau_screen_bo_new(struct pipe_screen *pscreen, unsigned alignment,
return bo;
}
-struct nouveau_bo *
-nouveau_screen_bo_user(struct pipe_screen *pscreen, void *ptr, unsigned bytes)
-{
- struct nouveau_device *dev = nouveau_screen(pscreen)->device;
- struct nouveau_bo *bo = NULL;
- int ret;
-
- ret = nouveau_bo_user(dev, ptr, bytes, &bo);
- if (ret)
- return NULL;
-
- return bo;
-}
-
void *
nouveau_screen_bo_map(struct pipe_screen *pscreen,
struct nouveau_bo *bo,
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 186ada39677..d910809a0ec 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -47,8 +47,6 @@ nouveau_screen(struct pipe_screen *pscreen)
struct nouveau_bo *
nouveau_screen_bo_new(struct pipe_screen *pscreen, unsigned alignment,
unsigned usage, unsigned bind, unsigned size);
-struct nouveau_bo *
-nouveau_screen_bo_user(struct pipe_screen *pscreen, void *ptr, unsigned bytes);
void *
nouveau_screen_bo_map(struct pipe_screen *pscreen,
struct nouveau_bo *pb,
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 632ca4daf74..ceb83f6e684 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -168,6 +168,7 @@ nv50_bufctx_add_resident(struct nv50_context *nv50, int ctx,
if (!resource->bo)
return;
+ nv50->residents_size += sizeof(struct resident);
/* We don't need to reference the resource here, it will be referenced
* in the context/state, and bufctx will be reset when state changes.
@@ -189,6 +190,7 @@ nv50_bufctx_del_resident(struct nv50_context *nv50, int ctx,
top = util_dynarray_pop_ptr(&nv50->residents[ctx], struct resident);
if (rsd != top)
*rsd = *top;
+ nv50->residents_size -= sizeof(struct resident);
break;
}
}
@@ -201,11 +203,15 @@ nv50_bufctx_emit_relocs(struct nv50_context *nv50)
struct util_dynarray *array;
unsigned ctx, i, n;
+ n = nv50->residents_size / sizeof(struct resident);
+ n += NV50_SCREEN_RESIDENT_BO_COUNT;
+
+ MARK_RING(nv50->screen->base.channel, n, n);
+
for (ctx = 0; ctx < NV50_BUFCTX_COUNT; ++ctx) {
array = &nv50->residents[ctx];
n = array->size / sizeof(struct resident);
- MARK_RING(nv50->screen->base.channel, n, n);
for (i = 0; i < n; ++i) {
rsd = util_dynarray_element(array, struct resident, i);
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 3f031994f0a..b4af24f6bce 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -64,6 +64,7 @@ struct nv50_context {
struct nv50_screen *screen;
struct util_dynarray residents[NV50_BUFCTX_COUNT];
+ unsigned residents_size;
uint32_t dirty;
@@ -156,6 +157,7 @@ void nv50_bufctx_del_resident(struct nv50_context *, int ctx,
static INLINE void
nv50_bufctx_reset(struct nv50_context *nv50, int ctx)
{
+ nv50->residents_size -= nv50->residents[ctx].size;
util_dynarray_resize(&nv50->residents[ctx], 0);
}
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index aea434b8679..64ad209a728 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -19,6 +19,8 @@ struct nv50_context;
#define NV50_SCRATCH_SIZE (2 << 20)
#define NV50_SCRATCH_NR_BUFFERS 2
+#define NV50_SCREEN_RESIDENT_BO_COUNT 5
+
struct nv50_screen {
struct nouveau_screen base;
struct nouveau_winsys *nvws;
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
index 5d3f52c38c1..e5b10c37bef 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -215,10 +215,12 @@ void
nv50_gmtyprog_validate(struct nv50_context *nv50)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nv50_program *gp = nv50->vertprog;
+ struct nv50_program *gp = nv50->gmtyprog;
+ if (!gp) /* GP_ENABLE is updated in linkage validation */
+ return;
if (!nv50_program_validate(nv50, gp))
- return;
+ return;
BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_TEMP), 1);
OUT_RING (chan, gp->max_gpr);
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index abdb9ce2f93..bb08941c243 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -404,9 +404,6 @@ nv50_draw_arrays(struct nv50_context *nv50,
struct nouveau_channel *chan = nv50->screen->base.channel;
unsigned prim;
- chan->flush_notify = nv50_draw_vbo_flush_notify;
- chan->user_private = nv50;
-
prim = nv50_prim_gl(mode);
while (instance_count--) {
@@ -420,8 +417,6 @@ nv50_draw_arrays(struct nv50_context *nv50,
prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
-
- chan->flush_notify = nv50_default_flush_notify;
}
static void
@@ -523,9 +518,6 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
unsigned prim;
const unsigned index_size = nv50->idxbuf.index_size;
- chan->flush_notify = nv50_draw_vbo_flush_notify;
- chan->user_private = nv50;
-
prim = nv50_prim_gl(mode);
if (index_bias != nv50->state.index_bias) {
@@ -631,8 +623,6 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
}
-
- chan->flush_notify = nv50_default_flush_notify;
}
void
@@ -659,8 +649,12 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv50_state_validate(nv50);
+ chan->flush_notify = nv50_draw_vbo_flush_notify;
+ chan->user_private = nv50;
+
if (nv50->vbo_fifo) {
nv50_push_vbo(nv50, info);
+ chan->flush_notify = nv50_default_flush_notify;
return;
}
@@ -712,6 +706,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
}
+ chan->flush_notify = nv50_default_flush_notify;
nv50_release_user_vbufs(nv50);
}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 2f2a3da7c44..2679b7f86aa 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -169,6 +169,7 @@ nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx,
if (!resource->bo)
return;
+ nvc0->residents_size += sizeof(struct resident);
/* We don't need to reference the resource here, it will be referenced
* in the context/state, and bufctx will be reset when state changes.
@@ -190,6 +191,7 @@ nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx,
top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident);
if (rsd != top)
*rsd = *top;
+ nvc0->residents_size -= sizeof(struct resident);
break;
}
}
@@ -202,11 +204,15 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
struct util_dynarray *array;
unsigned ctx, i, n;
+ n = nvc0->residents_size / sizeof(struct resident);
+ n += NVC0_SCREEN_RESIDENT_BO_COUNT;
+
+ MARK_RING(nvc0->screen->base.channel, n, n);
+
for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
array = &nvc0->residents[ctx];
n = array->size / sizeof(struct resident);
- MARK_RING(nvc0->screen->base.channel, n, n);
for (i = 0; i < n; ++i) {
rsd = util_dynarray_element(array, struct resident, i);
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index f97141dd46e..b05cc337d5d 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -62,6 +62,7 @@ struct nvc0_context {
struct nvc0_screen *screen;
struct util_dynarray residents[NVC0_BUFCTX_COUNT];
+ unsigned residents_size;
uint32_t dirty;
@@ -163,6 +164,7 @@ void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx,
static INLINE void
nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
{
+ nvc0->residents_size -= nvc0->residents[ctx].size;
util_dynarray_resize(&nvc0->residents[ctx], 0);
}
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 94bf0cf3481..015807e2f5d 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -16,6 +16,8 @@ struct nvc0_context;
#define NVC0_SCRATCH_SIZE (2 << 20)
#define NVC0_SCRATCH_NR_BUFFERS 2
+#define NVC0_SCREEN_RESIDENT_BO_COUNT 5
+
struct nvc0_screen {
struct nouveau_screen base;
struct nouveau_winsys *nvws;
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
index 6bbcf2447ec..41079104b39 100644
--- a/src/gallium/drivers/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -382,9 +382,6 @@ nvc0_draw_arrays(struct nvc0_context *nvc0,
struct nouveau_channel *chan = nvc0->screen->base.channel;
unsigned prim;
- chan->flush_notify = nvc0_draw_vbo_flush_notify;
- chan->user_private = nvc0;
-
prim = nvc0_prim_gl(mode);
while (instance_count--) {
@@ -397,8 +394,6 @@ nvc0_draw_arrays(struct nvc0_context *nvc0,
prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
-
- chan->flush_notify = nvc0_default_flush_notify;
}
static void
@@ -500,9 +495,6 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
unsigned prim;
const unsigned index_size = nvc0->idxbuf.index_size;
- chan->flush_notify = nvc0_draw_vbo_flush_notify;
- chan->user_private = nvc0;
-
prim = nvc0_prim_gl(mode);
if (index_bias != nvc0->state.index_bias) {
@@ -568,8 +560,6 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
}
-
- chan->flush_notify = nvc0_default_flush_notify;
}
void
@@ -596,8 +586,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0_state_validate(nvc0);
+ chan->flush_notify = nvc0_draw_vbo_flush_notify;
+ chan->user_private = nvc0;
+
if (nvc0->vbo_fifo) {
nvc0_push_vbo(nvc0, info);
+ chan->flush_notify = nvc0_default_flush_notify;
return;
}
@@ -648,6 +642,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
}
+ chan->flush_notify = nvc0_default_flush_notify;
nvc0_release_user_vbufs(nvc0);
}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 2b1510264a1..98603bedde1 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -24,9 +24,21 @@ nvfx_flush(struct pipe_context *pipe,
OUT_RING(chan, 1);
}*/
- FIRE_RING(chan);
- if (fence)
+ if (fence) {
+ /* horrific hack to make glFinish() work in the absence of
+ * having proper fences in nvfx. a pending rewrite will
+ * fix this properly, but may be a while off.
+ */
+ MARK_RING(chan, 1, 1);
+ OUT_RELOC(chan, screen->fence, 0, NOUVEAU_BO_WR |
+ NOUVEAU_BO_DUMMY, 0, 0);
+ FIRE_RING(chan);
+ nouveau_bo_map(screen->fence, NOUVEAU_BO_RDWR);
+ nouveau_bo_unmap(screen->fence);
*fence = NULL;
+ } else {
+ FIRE_RING(chan);
+ }
}
static void
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 78212029534..0140470d576 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -305,6 +305,7 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->eng3d);
nvfx_screen_surface_takedown(pscreen);
+ nouveau_bo_ref(NULL, &screen->fence);
nouveau_screen_fini(&screen->base);
@@ -470,6 +471,12 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
pscreen->context_create = nvfx_create;
pscreen->video_context_create = nvfx_video_create;
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 4096, &screen->fence);
+ if (ret) {
+ nvfx_screen_destroy(pscreen);
+ return NULL;
+ }
+
switch (dev->chipset & 0xf0) {
case 0x30:
if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h
index b1f07187c78..02e7c5d1cad 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.h
+++ b/src/gallium/drivers/nvfx/nvfx_screen.h
@@ -11,6 +11,7 @@ struct nvfx_screen {
struct nouveau_screen base;
struct nouveau_winsys *nvws;
+ struct nouveau_bo *fence;
struct nvfx_context *cur_ctx;
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b24e7faa644..b31141a518e 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -46,30 +46,26 @@
static uint32_t r300_translate_primitive(unsigned prim)
{
- switch (prim) {
- case PIPE_PRIM_POINTS:
- return R300_VAP_VF_CNTL__PRIM_POINTS;
- case PIPE_PRIM_LINES:
- return R300_VAP_VF_CNTL__PRIM_LINES;
- case PIPE_PRIM_LINE_LOOP:
- return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
- case PIPE_PRIM_LINE_STRIP:
- return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
- case PIPE_PRIM_TRIANGLES:
- return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
- case PIPE_PRIM_TRIANGLE_FAN:
- return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
- case PIPE_PRIM_QUADS:
- return R300_VAP_VF_CNTL__PRIM_QUADS;
- case PIPE_PRIM_QUAD_STRIP:
- return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
- case PIPE_PRIM_POLYGON:
- return R300_VAP_VF_CNTL__PRIM_POLYGON;
- default:
- return 0;
- }
+ static const int prim_conv[] = {
+ R300_VAP_VF_CNTL__PRIM_POINTS,
+ R300_VAP_VF_CNTL__PRIM_LINES,
+ R300_VAP_VF_CNTL__PRIM_LINE_LOOP,
+ R300_VAP_VF_CNTL__PRIM_LINE_STRIP,
+ R300_VAP_VF_CNTL__PRIM_TRIANGLES,
+ R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP,
+ R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN,
+ R300_VAP_VF_CNTL__PRIM_QUADS,
+ R300_VAP_VF_CNTL__PRIM_QUAD_STRIP,
+ R300_VAP_VF_CNTL__PRIM_POLYGON,
+ -1,
+ -1,
+ -1,
+ -1
+ };
+ unsigned hwprim = prim_conv[prim];
+
+ assert(hwprim != -1);
+ return hwprim;
}
static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
@@ -179,8 +175,8 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias,
enum r300_prepare_flags {
PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */
PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */
- PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */
- PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */
+ PREP_EMIT_VARRAYS = (1 << 2), /* call emit_vertex_arrays? */
+ PREP_EMIT_VARRAYS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */
PREP_INDEXED = (1 << 4) /* is this draw_elements? */
};
@@ -197,23 +193,22 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
unsigned cs_dwords)
{
boolean flushed = FALSE;
- boolean first_draw = flags & PREP_EMIT_STATES;
- boolean emit_vertex_arrays = flags & PREP_EMIT_AOS;
- boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean emit_states = flags & PREP_EMIT_STATES;
+ boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS;
+ boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL;
/* Add dirty state, index offset, and AOS. */
- if (first_draw) {
+ if (emit_states)
cs_dwords += r300_get_num_dirty_dwords(r300);
- if (r300->screen->caps.is_r500)
- cs_dwords += 2; /* emit_index_offset */
+ if (r300->screen->caps.is_r500)
+ cs_dwords += 2; /* emit_index_offset */
- if (emit_vertex_arrays)
- cs_dwords += 55; /* emit_vertex_arrays */
+ if (emit_vertex_arrays)
+ cs_dwords += 55; /* emit_vertex_arrays */
- if (emit_vertex_arrays_swtcl)
- cs_dwords += 7; /* emit_vertex_arrays_swtcl */
- }
+ if (emit_vertex_arrays_swtcl)
+ cs_dwords += 7; /* emit_vertex_arrays_swtcl */
cs_dwords += r300_get_num_cs_end_dwords(r300);
@@ -242,46 +237,48 @@ static boolean r300_emit_states(struct r300_context *r300,
int buffer_offset,
int index_bias, int instance_id)
{
- boolean first_draw = flags & PREP_EMIT_STATES;
- boolean emit_vertex_arrays = flags & PREP_EMIT_AOS;
- boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean emit_states = flags & PREP_EMIT_STATES;
+ boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS;
+ boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL;
boolean indexed = flags & PREP_INDEXED;
boolean validate_vbos = flags & PREP_VALIDATE_VBOS;
/* Validate buffers and emit dirty state if needed. */
- if (first_draw) {
+ if (emit_states || (emit_vertex_arrays && validate_vbos)) {
if (!r300_emit_buffer_validate(r300, validate_vbos,
index_buffer)) {
fprintf(stderr, "r300: CS space validation failed. "
"(not enough memory?) Skipping rendering.\n");
return FALSE;
}
+ }
+ if (emit_states)
r300_emit_dirty_state(r300);
- if (r300->screen->caps.is_r500) {
- if (r300->screen->caps.has_tcl)
- r500_emit_index_bias(r300, index_bias);
- else
- r500_emit_index_bias(r300, 0);
- }
- if (emit_vertex_arrays &&
- (r300->vertex_arrays_dirty ||
- r300->vertex_arrays_indexed != indexed ||
- r300->vertex_arrays_offset != buffer_offset ||
- r300->vertex_arrays_instance_id != instance_id)) {
- r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id);
-
- r300->vertex_arrays_dirty = FALSE;
- r300->vertex_arrays_indexed = indexed;
- r300->vertex_arrays_offset = buffer_offset;
- r300->vertex_arrays_instance_id = instance_id;
- }
+ if (r300->screen->caps.is_r500) {
+ if (r300->screen->caps.has_tcl)
+ r500_emit_index_bias(r300, index_bias);
+ else
+ r500_emit_index_bias(r300, 0);
+ }
- if (emit_vertex_arrays_swtcl)
- r300_emit_vertex_arrays_swtcl(r300, indexed);
+ if (emit_vertex_arrays &&
+ (r300->vertex_arrays_dirty ||
+ r300->vertex_arrays_indexed != indexed ||
+ r300->vertex_arrays_offset != buffer_offset ||
+ r300->vertex_arrays_instance_id != instance_id)) {
+ r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id);
+
+ r300->vertex_arrays_dirty = FALSE;
+ r300->vertex_arrays_indexed = indexed;
+ r300->vertex_arrays_offset = buffer_offset;
+ r300->vertex_arrays_instance_id = instance_id;
}
+ if (emit_vertex_arrays_swtcl)
+ r300_emit_vertex_arrays_swtcl(r300, indexed);
+
return TRUE;
}
@@ -544,7 +541,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300,
/* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
- PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
+ PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS |
PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1))
return;
@@ -666,7 +663,7 @@ static void r300_draw_elements(struct r300_context *r300,
/* 19 dwords for emit_draw_elements. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
- PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
+ PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS |
PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias,
instance_id))
goto done;
@@ -677,10 +674,11 @@ static void r300_draw_elements(struct r300_context *r300,
indices3);
} else {
do {
- if (indexSize == 2 && (start & 1))
- short_count = MIN2(count, 65535);
- else
- short_count = MIN2(count, 65534);
+ /* The maximum must be divisible by 4 and 3,
+ * so that quad and triangle lists are split correctly.
+ *
+ * Strips, loops, and fans won't work. */
+ short_count = MIN2(count, 65532);
r300_emit_draw_elements(r300, indexBuffer, indexSize,
info->min_index, info->max_index,
@@ -692,7 +690,7 @@ static void r300_draw_elements(struct r300_context *r300,
/* 15 dwords for emit_draw_elements */
if (count) {
if (!r300_prepare_for_rendering(r300,
- PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
+ PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED,
indexBuffer, 19, buffer_offset, info->index_bias,
instance_id))
goto done;
@@ -718,7 +716,7 @@ static void r300_draw_arrays(struct r300_context *r300,
/* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
- PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
+ PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS,
NULL, 9, start, 0, instance_id))
return;
@@ -726,7 +724,11 @@ static void r300_draw_arrays(struct r300_context *r300,
r300_emit_draw_arrays(r300, info->mode, count);
} else {
do {
- short_count = MIN2(count, 65535);
+ /* The maximum must be divisible by 4 and 3,
+ * so that quad and triangle lists are split correctly.
+ *
+ * Strips, loops, and fans won't work. */
+ short_count = MIN2(count, 65532);
r300_emit_draw_arrays(r300, info->mode, short_count);
start += short_count;
@@ -735,7 +737,7 @@ static void r300_draw_arrays(struct r300_context *r300,
/* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
if (count) {
if (!r300_prepare_for_rendering(r300,
- PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
+ PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, NULL, 9,
start, 0, instance_id))
return;
}
@@ -766,7 +768,6 @@ static void r300_draw_vbo(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct pipe_draw_info info = *dinfo;
- boolean buffers_updated, uploader_flushed;
info.indexed = info.indexed && r300->index_buffer.buffer;
@@ -778,9 +779,7 @@ static void r300_draw_vbo(struct pipe_context* pipe,
r300_update_derived_state(r300);
/* Start the vbuf manager and update buffers if needed. */
- u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info,
- &buffers_updated, &uploader_flushed);
- if (buffers_updated) {
+ if (u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) {
r300->vertex_arrays_dirty = TRUE;
}
@@ -842,7 +841,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
r300_update_derived_state(r300);
r300_reserve_cs_dwords(r300,
- PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL |
+ PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL |
(indexed ? PREP_INDEXED : 0),
indexed ? 256 : 6);
@@ -1024,12 +1023,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
if (r300->draw_first_emitted) {
if (!r300_prepare_for_rendering(r300,
- PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL,
+ PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL,
NULL, dwords, 0, 0, -1))
return;
} else {
if (!r300_emit_states(r300,
- PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL,
+ PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL,
NULL, 0, 0, -1))
return;
}
@@ -1064,12 +1063,12 @@ static void r300_render_draw_elements(struct vbuf_render* render,
if (r300->draw_first_emitted) {
if (!r300_prepare_for_rendering(r300,
- PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED,
NULL, 256, 0, 0, -1))
return;
} else {
if (!r300_emit_states(r300,
- PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED,
NULL, 0, 0, -1))
return;
}
@@ -1106,7 +1105,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
if (count) {
if (!r300_prepare_for_rendering(r300,
- PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED,
NULL, 256, 0, 0, -1))
return;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 7127ea1ac16..057cd9faf03 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1307,7 +1307,7 @@ static void*
sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
state->mag_img_filter,
state->min_mip_filter,
- state->max_anisotropy > 0);
+ state->max_anisotropy > 1);
sampler->filter0 |= r300_anisotropy(state->max_anisotropy);
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 121409b2260..f63114e7eb7 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -854,6 +854,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
+ /* The hardware doesn't like CLAMP and CLAMP_TO_BORDER
+ * for the 3rd coordinate if the texture isn't 3D. */
+ if (tex->b.b.b.target != PIPE_TEXTURE_3D) {
+ texstate->filter0 &= ~R300_TX_WRAP_R_MASK;
+ }
+
if (tex->tex.is_npot) {
/* NPOT textures don't support mip filter, unfortunately.
* This prevents incorrect rendering. */
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 54dae1acd98..62c03b3909b 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -261,51 +261,49 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
}
static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
- int is_anisotropic)
+ boolean is_anisotropic)
{
uint32_t retval = 0;
- if (is_anisotropic)
- retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO;
- else {
- switch (min) {
- case PIPE_TEX_FILTER_NEAREST:
- retval |= R300_TX_MIN_FILTER_NEAREST;
- break;
- case PIPE_TEX_FILTER_LINEAR:
- retval |= R300_TX_MIN_FILTER_LINEAR;
- break;
- default:
- fprintf(stderr, "r300: Unknown texture filter %d\n", min);
- assert(0);
- break;
- }
- switch (mag) {
- case PIPE_TEX_FILTER_NEAREST:
- retval |= R300_TX_MAG_FILTER_NEAREST;
- break;
- case PIPE_TEX_FILTER_LINEAR:
- retval |= R300_TX_MAG_FILTER_LINEAR;
- break;
- default:
- fprintf(stderr, "r300: Unknown texture filter %d\n", mag);
- assert(0);
- break;
- }
+
+ switch (min) {
+ case PIPE_TEX_FILTER_NEAREST:
+ retval |= R300_TX_MIN_FILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ retval |= is_anisotropic ? R300_TX_MIN_FILTER_ANISO :
+ R300_TX_MIN_FILTER_LINEAR;
+ break;
+ default:
+ fprintf(stderr, "r300: Unknown texture filter %d\n", min);
+ assert(0);
}
+
+ switch (mag) {
+ case PIPE_TEX_FILTER_NEAREST:
+ retval |= R300_TX_MAG_FILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ retval |= is_anisotropic ? R300_TX_MAG_FILTER_ANISO :
+ R300_TX_MAG_FILTER_LINEAR;
+ break;
+ default:
+ fprintf(stderr, "r300: Unknown texture filter %d\n", mag);
+ assert(0);
+ }
+
switch (mip) {
- case PIPE_TEX_MIPFILTER_NONE:
- retval |= R300_TX_MIN_FILTER_MIP_NONE;
- break;
- case PIPE_TEX_MIPFILTER_NEAREST:
- retval |= R300_TX_MIN_FILTER_MIP_NEAREST;
- break;
- case PIPE_TEX_MIPFILTER_LINEAR:
- retval |= R300_TX_MIN_FILTER_MIP_LINEAR;
- break;
- default:
- fprintf(stderr, "r300: Unknown texture filter %d\n", mip);
- assert(0);
- break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ retval |= R300_TX_MIN_FILTER_MIP_NONE;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ retval |= R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ retval |= R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
+ default:
+ fprintf(stderr, "r300: Unknown texture filter %d\n", mip);
+ assert(0);
}
return retval;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 38ca9a24e45..62c2f1fff6c 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -447,16 +447,8 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
/*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B8G8R8X8_UNORM:
/*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/
- case PIPE_FORMAT_A8B8G8R8_UNORM:
- /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
- case PIPE_FORMAT_X8B8G8R8_UNORM:
- /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/
case PIPE_FORMAT_R8G8B8X8_UNORM:
/*case PIPE_FORMAT_R8G8B8X8_SNORM:*/
/* These formats work fine with ARGB8888 if US_OUT_FMT is set
@@ -662,10 +654,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
R300_C2_SEL_R | R300_C3_SEL_A;
/* ARGB outputs. */
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/
case PIPE_FORMAT_A16_UNORM:
case PIPE_FORMAT_A16_SNORM:
case PIPE_FORMAT_A16_FLOAT:
@@ -674,15 +662,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
R300_C0_SEL_A | R300_C1_SEL_R |
R300_C2_SEL_G | R300_C3_SEL_B;
- /* ABGR outputs. */
- case PIPE_FORMAT_A8B8G8R8_UNORM:
- /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/
- case PIPE_FORMAT_X8B8G8R8_UNORM:
- /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/
- return modifier |
- R300_C0_SEL_A | R300_C1_SEL_B |
- R300_C2_SEL_G | R300_C3_SEL_R;
-
/* RGBA outputs. */
case PIPE_FORMAT_R8G8B8X8_UNORM:
/*case PIPE_FORMAT_R8G8B8X8_SNORM:*/
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 0135808f10a..19f07b2bef8 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -2,11 +2,7 @@ Import('*')
env = env.Clone()
-try:
- env.ParseConfig('pkg-config --cflags libdrm_radeon')
-except OSError:
- print 'warning: not building r600'
- Return()
+env.PkgUseModules('DRM_RADEON')
env.Append(CPPPATH = [
'#/include',
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index b780dba3e33..b5590116e8f 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -292,7 +292,7 @@ static inline uint32_t r600_translate_stencilformat(enum pipe_format format)
static inline uint32_t r600_translate_colorswap(enum pipe_format format)
{
switch (format) {
- /* 8-bit buffers. */
+ /* 8-bit buffers. */
case PIPE_FORMAT_L4A4_UNORM:
return V_028C70_SWAP_ALT;
@@ -305,7 +305,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R8_SNORM:
return V_028C70_SWAP_STD;
- /* 16-bit buffers. */
+ /* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return V_028C70_SWAP_STD_REV;
@@ -327,9 +327,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_028C70_SWAP_STD;
case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_R16_FLOAT:
return V_028C70_SWAP_STD;
- /* 32-bit buffers. */
+ /* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
return V_028C70_SWAP_STD_REV;
case PIPE_FORMAT_B8G8R8A8_SRGB:
@@ -343,6 +344,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_X8R8G8B8_UNORM:
return V_028C70_SWAP_ALT_REV;
case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
return V_028C70_SWAP_STD;
@@ -373,13 +375,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R16G16_UNORM:
return V_028C70_SWAP_STD;
- /* 64-bit buffers. */
+ /* 64-bit buffers. */
case PIPE_FORMAT_R32G32_FLOAT:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- /* 128-bit buffers. */
+ /* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32A32_SNORM:
case PIPE_FORMAT_R32G32B32A32_UNORM:
@@ -394,7 +396,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
{
switch (format) {
- /* 8-bit buffers. */
+ /* 8-bit buffers. */
case PIPE_FORMAT_L4A4_UNORM:
return V_028C70_COLOR_4_4;
@@ -406,7 +408,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R8_SNORM:
return V_028C70_COLOR_8;
- /* 16-bit buffers. */
+ /* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return V_028C70_COLOR_5_6_5;
@@ -429,7 +431,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R16_UNORM:
return V_028C70_COLOR_16;
- /* 32-bit buffers. */
+ case PIPE_FORMAT_R16_FLOAT:
+ return V_028C70_COLOR_16_FLOAT;
+
+ /* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
case PIPE_FORMAT_A8B8G8R8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -472,7 +477,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R11G11B10_FLOAT:
return V_028C70_COLOR_10_11_11_FLOAT;
- /* 64-bit buffers. */
+ /* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16_USCALED:
case PIPE_FORMAT_R16G16B16A16_USCALED:
case PIPE_FORMAT_R16G16B16_SSCALED:
@@ -492,20 +497,21 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R32G32_SSCALED:
return V_028C70_COLOR_32_32;
- /* 128-bit buffers. */
+ /* 96-bit buffers. */
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return V_028C70_COLOR_32_32_32_FLOAT;
+
+ /* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_SNORM:
case PIPE_FORMAT_R32G32B32A32_UNORM:
return V_028C70_COLOR_32_32_32_32;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return V_028C70_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return V_028C70_COLOR_32_32_32_32_FLOAT;
- /* YUV buffers. */
+ /* YUV buffers. */
case PIPE_FORMAT_UYVY:
case PIPE_FORMAT_YUYV:
default:
- /* R600_ERR("unsupported color format %d\n", format); */
return ~0; /* Unsupported. */
}
}
@@ -517,11 +523,11 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_028C70_COLOR_4_4:
return(ENDIAN_NONE);
- /* 8-bit buffers. */
+ /* 8-bit buffers. */
case V_028C70_COLOR_8:
return(ENDIAN_NONE);
- /* 16-bit buffers. */
+ /* 16-bit buffers. */
case V_028C70_COLOR_5_6_5:
case V_028C70_COLOR_1_5_5_5:
case V_028C70_COLOR_4_4_4_4:
@@ -529,7 +535,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_028C70_COLOR_8_8:
return(ENDIAN_8IN16);
- /* 32-bit buffers. */
+ /* 32-bit buffers. */
case V_028C70_COLOR_8_8_8_8:
case V_028C70_COLOR_2_10_10_10:
case V_028C70_COLOR_8_24:
@@ -539,7 +545,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_028C70_COLOR_16_16:
return(ENDIAN_8IN32);
- /* 64-bit buffers. */
+ /* 64-bit buffers. */
case V_028C70_COLOR_16_16_16_16:
case V_028C70_COLOR_16_16_16_16_FLOAT:
return(ENDIAN_8IN16);
@@ -548,8 +554,9 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_028C70_COLOR_32_32:
return(ENDIAN_8IN32);
- /* 128-bit buffers. */
+ /* 96-bit buffers. */
case V_028C70_COLOR_32_32_32_FLOAT:
+ /* 128-bit buffers. */
case V_028C70_COLOR_32_32_32_32_FLOAT:
case V_028C70_COLOR_32_32_32_32:
return(ENDIAN_8IN32);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9ebfe54c76d..dc182611482 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -256,6 +256,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
}
rstate = &rs->rstate;
+ rs->clamp_vertex_color = state->clamp_vertex_color;
+ rs->clamp_fragment_color = state->clamp_fragment_color;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
@@ -482,19 +484,27 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
int i;
+ int has_depth = 0;
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
- if (resource[i])
+ if (resource[i]) {
+ if (((struct r600_resource_texture *)resource[i]->base.texture)->depth)
+ has_depth = 1;
evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
i + R600_MAX_CONST_BUFFERS);
- else
+ } else
evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
views[i]);
+ } else {
+ if (resource[i]) {
+ if (((struct r600_resource_texture *)resource[i]->base.texture)->depth)
+ has_depth = 1;
+ }
}
}
for (i = count; i < NUM_TEX_UNITS; i++) {
@@ -504,6 +514,7 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
+ rctx->have_depth_texture = has_depth;
rctx->ps_samplers.n_views = count;
}
@@ -689,6 +700,9 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+ if (rtex->depth)
+ rctx->have_depth_fb = TRUE;
+
if (rtex->depth && !rtex->is_flushing_texture) {
r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
rtex = rtex->flushed_depth_texture;
@@ -870,6 +884,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
/* build states */
+ rctx->have_depth_fb = 0;
+ rctx->nr_cbufs = state->nr_cbufs;
for (int i = 0; i < state->nr_cbufs; i++) {
evergreen_cb(rctx, rstate, state, i);
}
@@ -1616,7 +1632,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
exports_ps |= 1;
else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
- num_cout++;
+ if (rshader->fs_write_all)
+ num_cout = rshader->nr_cbufs;
+ else
+ num_cout++;
}
}
exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index bf7138d9e4e..151e831e5c6 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -245,6 +245,7 @@ struct r600_context {
unsigned pm4_cdwords;
unsigned pm4_dirty_cdwords;
unsigned ctx_pm4_ndwords;
+ unsigned init_dwords;
unsigned nreloc;
unsigned creloc;
struct r600_reloc *reloc;
@@ -261,6 +262,7 @@ struct r600_context {
struct r600_range vs_resources;
struct r600_range fs_resources;
int num_ps_resources, num_vs_resources, num_fs_resources;
+ boolean have_depth_texture, have_depth_fb;
};
struct r600_draw {
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 3196d97dbbb..065f955ebcb 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1383,6 +1383,9 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
break;
}
}
+ /* slight hack to make gradients always go into same cf */
+ if (ntex->inst == SQ_TEX_INST_SET_GRADIENTS_H)
+ bc->force_add_cf = 1;
}
/* cf can contains only alu or only vtx or only tex */
@@ -1860,6 +1863,8 @@ void r600_bc_dump(struct r600_bc *bc)
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
fprintf(stderr, "GPR:%X ", cf->output.gpr);
fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
@@ -2258,7 +2263,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
ve->fs_size = bc.ndw*4;
/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
- ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
if (ve->fetch_shader == NULL) {
r600_bc_clear(&bc);
return -ENOMEM;
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 151f48a8bf8..6171d285bb9 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -27,9 +27,18 @@
enum r600_blitter_op /* bitmask */
{
- R600_CLEAR = 1,
- R600_CLEAR_SURFACE = 2,
- R600_COPY = 4
+ R600_SAVE_TEXTURES = 1,
+ R600_SAVE_FRAMEBUFFER = 2,
+ R600_DISABLE_RENDER_COND = 4,
+
+ R600_CLEAR = 0,
+
+ R600_CLEAR_SURFACE = R600_SAVE_FRAMEBUFFER,
+
+ R600_COPY = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES |
+ R600_DISABLE_RENDER_COND,
+
+ R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND,
};
static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
@@ -58,10 +67,10 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
rctx->vbuf_mgr->nr_vertex_buffers,
rctx->vbuf_mgr->vertex_buffer);
- if (op & (R600_CLEAR_SURFACE | R600_COPY))
+ if (op & R600_SAVE_FRAMEBUFFER)
util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
- if (op & R600_COPY) {
+ if (op & R600_SAVE_TEXTURES) {
util_blitter_save_fragment_sampler_states(
rctx->blitter, rctx->ps_samplers.n_samplers,
(void**)rctx->ps_samplers.samplers);
@@ -71,11 +80,23 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
(struct pipe_sampler_view**)rctx->ps_samplers.views);
}
+ if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
+ rctx->saved_render_cond = rctx->current_render_cond;
+ rctx->saved_render_cond_mode = rctx->current_render_cond_mode;
+ rctx->context.render_condition(&rctx->context, NULL, 0);
+ }
+
}
static void r600_blitter_end(struct pipe_context *ctx)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ if (rctx->saved_render_cond) {
+ rctx->context.render_condition(&rctx->context,
+ rctx->saved_render_cond,
+ rctx->saved_render_cond_mode);
+ rctx->saved_render_cond = NULL;
+ }
r600_context_queries_resume(&rctx->ctx);
rctx->blit = false;
}
@@ -107,7 +128,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t
rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635)
depth = 0.0f;
- r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
+ r600_blitter_begin(ctx, R600_DECOMPRESS);
util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth);
r600_blitter_end(ctx);
@@ -121,8 +142,6 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx)
{
unsigned int i;
- if (rctx->blit) return;
-
/* FIXME: This handles fragment shader textures only. */
for (i = 0; i < rctx->ps_samplers.n_views; ++i) {
@@ -275,6 +294,8 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
{
struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src;
struct texture_orig_info orig_info[2];
+ struct pipe_box sbox;
+ const struct pipe_box *psbox;
boolean restore_orig[2];
/* Fallback for buffers. */
@@ -292,7 +313,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
if (util_format_is_compressed(src->format)) {
r600_compressed_to_blittable(src, src_level, &orig_info[0]);
restore_orig[0] = TRUE;
- }
+ sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
+ sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y);
+ sbox.z = src_box->z;
+ sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
+ sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height);
+ sbox.depth = src_box->depth;
+ psbox=&sbox;
+ } else
+ psbox=src_box;
if (util_format_is_compressed(dst->format)) {
r600_compressed_to_blittable(dst, dst_level, &orig_info[1]);
@@ -303,7 +332,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
}
r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
+ src, src_level, psbox);
if (restore_orig[0])
r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 70e3619de4b..049a4daae66 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -127,9 +127,6 @@ static void r600_flush(struct pipe_context *ctx,
if (rfence)
*rfence = r600_create_fence(rctx);
- if (!rctx->ctx.pm4_cdwords)
- return;
-
#if 0
sprintf(dname, "gallium-%08d.bof", dc);
if (dc < 20) {
@@ -139,11 +136,6 @@ static void r600_flush(struct pipe_context *ctx,
dc++;
#endif
r600_context_flush(&rctx->ctx);
-
- /* XXX This shouldn't be really necessary, but removing it breaks some tests.
- * Needless buffer reallocations may significantly increase memory consumption,
- * so getting rid of this call is important. */
- u_upload_flush(rctx->vbuf_mgr->uploader);
}
static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
@@ -373,7 +365,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_SM3:
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_DEPTH_CLAMP:
@@ -382,6 +373,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_SM3:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
return 1;
/* Supported except the original R600. */
@@ -391,14 +385,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return family == CHIP_R600 ? 0 : 1;
/* Supported on Evergreen. */
- case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return family >= CHIP_CEDAR ? 1 : 0;
/* Unsupported features. */
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_PRIMITIVE_RESTART:
- case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
@@ -487,9 +479,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
return 8; /* FIXME */
case PIPE_SHADER_CAP_MAX_INPUTS:
if(shader == PIPE_SHADER_FRAGMENT)
- return 10;
+ return 34;
else
- return 16;
+ return 32;
case PIPE_SHADER_CAP_MAX_TEMPS:
return 256; /* Max native temporaries. */
case PIPE_SHADER_CAP_MAX_ADDRS:
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index d92b74ebc4e..2667c80bcef 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -50,6 +50,7 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_BLEND = 0,
R600_PIPE_STATE_BLEND_COLOR,
R600_PIPE_STATE_CONFIG,
+ R600_PIPE_STATE_SEAMLESS_CUBEMAP,
R600_PIPE_STATE_CLIP,
R600_PIPE_STATE_SCISSOR,
R600_PIPE_STATE_VIEWPORT,
@@ -87,6 +88,8 @@ struct r600_pipe_sampler_view {
struct r600_pipe_rasterizer {
struct r600_pipe_state rstate;
+ boolean clamp_vertex_color;
+ boolean clamp_fragment_color;
boolean flatshade;
unsigned sprite_coord_enable;
float offset_units;
@@ -124,6 +127,12 @@ struct r600_pipe_shader {
struct r600_bo *bo;
struct r600_bo *bo_fetch;
struct r600_vertex_element vertex_elements;
+ struct tgsi_token *tokens;
+};
+
+struct r600_pipe_sampler_state {
+ struct r600_pipe_state rstate;
+ boolean seamless_cube_map;
};
/* needed for blitter save */
@@ -191,12 +200,20 @@ struct r600_pipe_context {
struct r600_pipe_rasterizer *rasterizer;
struct r600_pipe_state vgt;
struct r600_pipe_state spi;
+ struct pipe_query *current_render_cond;
+ unsigned current_render_cond_mode;
+ struct pipe_query *saved_render_cond;
+ unsigned saved_render_cond_mode;
/* shader information */
+ boolean clamp_vertex_color;
+ boolean clamp_fragment_color;
+ boolean spi_dirty;
unsigned sprite_coord_enable;
boolean flatshade;
boolean export_16bpc;
unsigned alpha_ref;
boolean alpha_ref_dirty;
+ unsigned nr_cbufs;
struct r600_textures_info ps_samplers;
struct r600_pipe_fences fences;
@@ -204,7 +221,9 @@ struct r600_pipe_context {
struct u_vbuf_mgr *vbuf_mgr;
struct util_slab_mempool pool_transfers;
boolean blit;
+ boolean have_depth_texture, have_depth_fb;
+ unsigned default_ps_gprs, default_vs_gprs;
};
struct r600_drawl {
@@ -252,7 +271,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx);
void r600_init_context_resource_functions(struct r600_pipe_context *r600);
/* r600_shader.c */
-int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
+int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id);
@@ -270,6 +289,7 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride);
+void r600_adjust_gprs(struct r600_pipe_context *rctx);
/* r600_texture.c */
void r600_init_screen_texture_functions(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 181ea3f9e49..bedb48b6031 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -75,6 +75,9 @@ static void r600_render_condition(struct pipe_context *ctx,
struct r600_query *rquery = (struct r600_query *)query;
int wait_flag = 0;
+ rctx->current_render_cond = query;
+ rctx->current_render_cond_mode = mode;
+
if (!query) {
rctx->ctx.predicate_drawing = false;
r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 39e6d85d7b4..f83d7079b29 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -63,10 +63,6 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
{
struct r600_shader_io *input = &ps->input[id];
- /* position/face doesn't get/need a semantic index */
- if (input->name == TGSI_SEMANTIC_POSITION || input->name == TGSI_SEMANTIC_FACE)
- return 0;
-
for (int i = 0; i < vs->noutput; i++) {
if (input->name == vs->output[i].name &&
input->sid == vs->output[i].sid) {
@@ -85,7 +81,8 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
/* copy new shader */
if (shader->bo == NULL) {
- shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
if (shader->bo == NULL) {
return -ENOMEM;
}
@@ -121,9 +118,9 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
return 0;
}
-static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
-int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
+int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
@@ -136,10 +133,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
if (dump_shaders) {
fprintf(stderr, "--------------------------------------------------------------\n");
- tgsi_dump(tokens, 0);
+ tgsi_dump(shader->tokens, 0);
}
shader->shader.family = r600_get_family(rctx->radeon);
- r = r600_shader_from_tgsi(tokens, &shader->shader);
+ r = r600_shader_from_tgsi(rctx, shader);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
@@ -162,6 +159,8 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader
r600_bo_reference(rctx->radeon, &shader->bo, NULL);
r600_bc_clear(&shader->shader.bc);
+
+ memset(&shader->shader,0,sizeof(struct r600_shader));
}
/*
@@ -189,7 +188,7 @@ struct r600_shader_ctx {
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
struct r600_shader *shader;
- struct r600_shader_src src[3];
+ struct r600_shader_src src[4];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
@@ -597,15 +596,17 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
return 0;
}
-static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
{
+ struct r600_shader *shader = &pipeshader->shader;
+ struct tgsi_token *tokens = pipeshader->tokens;
struct tgsi_full_immediate *immediate;
struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bc_output output[32];
unsigned output_done, noutput;
unsigned opcode;
- int i, r = 0, pos0;
+ int i, j, r = 0, pos0;
ctx.bc = &shader->bc;
ctx.shader = shader;
@@ -619,6 +620,11 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
+ shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
+ ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
+
+ shader->nr_cbufs = rctx->nr_cbufs;
+
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
* Values [128,159] correspond to constant buffer bank 0
@@ -728,52 +734,103 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
goto out_err;
}
}
- /* export output */
+
noutput = shader->noutput;
+
+ /* clamp color outputs */
+ if (shader->clamp_color) {
+ for (i = 0; i < noutput; i++) {
+ if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
+ shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
+
+ int j;
+ for (j = 0; j < 4; j++) {
+ struct r600_bc_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ /* MOV_SAT R, R */
+ alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.dst.sel = shader->output[i].gpr;
+ alu.dst.chan = j;
+ alu.dst.write = 1;
+ alu.dst.clamp = 1;
+ alu.src[0].sel = alu.dst.sel;
+ alu.src[0].chan = j;
+
+ if (j == 3) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx.bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ }
+ }
+
+ /* export output */
+ j = 0;
for (i = 0, pos0 = 0; i < noutput; i++) {
memset(&output[i], 0, sizeof(struct r600_bc_output));
- output[i].gpr = shader->output[i].gpr;
- output[i].elem_size = 3;
- output[i].swizzle_x = 0;
- output[i].swizzle_y = 1;
- output[i].swizzle_z = 2;
- output[i].swizzle_w = 3;
- output[i].burst_count = 1;
- output[i].barrier = 1;
- output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
- output[i].array_base = i - pos0;
- output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+ output[i + j].gpr = shader->output[i].gpr;
+ output[i + j].elem_size = 3;
+ output[i + j].swizzle_x = 0;
+ output[i + j].swizzle_y = 1;
+ output[i + j].swizzle_z = 2;
+ output[i + j].swizzle_w = 3;
+ output[i + j].burst_count = 1;
+ output[i + j].barrier = 1;
+ output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[i + j].array_base = i - pos0;
+ output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
switch (ctx.type) {
case TGSI_PROCESSOR_VERTEX:
if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output[i].array_base = 60;
- output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output[i + j].array_base = 60;
+ output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
/* position doesn't count in array_base */
pos0++;
}
if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
- output[i].array_base = 61;
- output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output[i + j].array_base = 61;
+ output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
/* position doesn't count in array_base */
pos0++;
}
break;
case TGSI_PROCESSOR_FRAGMENT:
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
- output[i].array_base = shader->output[i].sid;
- output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[i + j].array_base = shader->output[i].sid;
+ output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) {
+ for (j = 1; j < shader->nr_cbufs; j++) {
+ memset(&output[i + j], 0, sizeof(struct r600_bc_output));
+ output[i + j].gpr = shader->output[i].gpr;
+ output[i + j].elem_size = 3;
+ output[i + j].swizzle_x = 0;
+ output[i + j].swizzle_y = 1;
+ output[i + j].swizzle_z = 2;
+ output[i + j].swizzle_w = 3;
+ output[i + j].burst_count = 1;
+ output[i + j].barrier = 1;
+ output[i + j].array_base = shader->output[i].sid + j;
+ output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+ output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ }
+ j--;
+ }
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output[i].array_base = 61;
- output[i].swizzle_x = 2;
- output[i].swizzle_y = 7;
- output[i].swizzle_z = output[i].swizzle_w = 7;
- output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[i + j].array_base = 61;
+ output[i + j].swizzle_x = 2;
+ output[i + j].swizzle_y = 7;
+ output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
+ output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
- output[i].array_base = 61;
- output[i].swizzle_x = 7;
- output[i].swizzle_y = 1;
- output[i].swizzle_z = output[i].swizzle_w = 7;
- output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[i + j].array_base = 61;
+ output[i + j].swizzle_x = 7;
+ output[i + j].swizzle_y = 1;
+ output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
+ output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
r = -EINVAL;
@@ -786,6 +843,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
goto out_err;
}
}
+ noutput += j;
/* add fake param output for vertex shader if no param is exported */
if (ctx.type == TGSI_PROCESSOR_VERTEX) {
for (i = 0, pos0 = 0; i < noutput; i++) {
@@ -1306,41 +1364,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
struct r600_bc_alu alu;
int r;
- /* dst.x, <- 1.0 */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
- alu.src[0].chan = 0;
- tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
-
- /* dst.y = max(src.x, 0.0) */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
- r600_bc_src(&alu.src[0], &ctx->src[0], 0);
- alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
- alu.src[1].chan = 0;
- tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
-
- /* dst.w, <- 1.0 */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = V_SQ_ALU_SRC_1;
- alu.src[0].chan = 0;
- tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
-
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
@@ -1369,7 +1392,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
r600_bc_src(&alu.src[0], &ctx->src[0], 1);
- tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1426,6 +1451,42 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
return r;
}
}
+
+ /* dst.x, <- 1.0 */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
+ alu.src[0].chan = 0;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* dst.y = max(src.x, 0.0) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
+ alu.src[1].chan = 0;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* dst.w, <- 1.0 */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
return 0;
}
@@ -1748,6 +1809,22 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
return 0;
}
+static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
+ unsigned index)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
+ inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
+ ctx->src[index].neg || ctx->src[index].abs;
+}
+
+static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
+ unsigned index)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
+}
+
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
static float one_point_five = 1.5f;
@@ -1755,19 +1832,70 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
- int r, i;
+ int r, i, j;
int opcode;
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
- const boolean src_requires_loading =
- (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
- inst->Src[0].Register.File != TGSI_FILE_INPUT) ||
- ctx->src[0].neg || ctx->src[0].abs;
+ const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
boolean src_loaded = FALSE;
+ unsigned sampler_src_reg = 1;
+
+ src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
+ /* TGSI moves the sampler to src reg 3 for TXD */
+ sampler_src_reg = 3;
+
+ for (i = 1; i < 3; i++) {
+ /* set gradients h/v */
+ memset(&tex, 0, sizeof(struct r600_bc_tex));
+ tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
+ SQ_TEX_INST_SET_GRADIENTS_V;
+ tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+
+ if (tgsi_tex_src_requires_loading(ctx, i)) {
+ tex.src_gpr = r600_get_temp(ctx);
+ tex.src_sel_x = 0;
+ tex.src_sel_y = 1;
+ tex.src_sel_z = 2;
+ tex.src_sel_w = 3;
+
+ for (j = 0; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ r600_bc_src(&alu.src[0], &ctx->src[i], j);
+ alu.dst.sel = tex.src_gpr;
+ alu.dst.chan = j;
+ if (j == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
- src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ } else {
+ tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
+ tex.src_sel_x = ctx->src[i].swizzle[0];
+ tex.src_sel_y = ctx->src[i].swizzle[1];
+ tex.src_sel_z = ctx->src[i].swizzle[2];
+ tex.src_sel_w = ctx->src[i].swizzle[3];
+ tex.src_rel = ctx->src[i].rel;
+ }
+ tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
+ tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
+ if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
+ tex.coord_type_x = 1;
+ tex.coord_type_y = 1;
+ tex.coord_type_z = 1;
+ tex.coord_type_w = 1;
+ }
+ r = r600_bc_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+ }
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
int out_chan;
/* Add perspective divide */
if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
@@ -1954,13 +2082,24 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
}
opcode = ctx->inst_info->r600_opcode;
- if (opcode == SQ_TEX_INST_SAMPLE &&
- (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
- opcode = SQ_TEX_INST_SAMPLE_C;
+ if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) {
+ switch (opcode) {
+ case SQ_TEX_INST_SAMPLE:
+ opcode = SQ_TEX_INST_SAMPLE_C;
+ break;
+ case SQ_TEX_INST_SAMPLE_L:
+ opcode = SQ_TEX_INST_SAMPLE_C_L;
+ break;
+ case SQ_TEX_INST_SAMPLE_G:
+ opcode = SQ_TEX_INST_SAMPLE_C_G;
+ break;
+ }
+ }
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
- tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
+
+ tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
@@ -3085,7 +3224,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
{TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
{TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -3191,7 +3330,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
{TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
{TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
{TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
{TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
@@ -3243,7 +3382,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
{TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
{TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -3401,7 +3540,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
{TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
{TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 8f96ce5085c..76aebf2b1ea 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -46,6 +46,8 @@ struct r600_shader {
enum radeon_family family;
boolean uses_kill;
boolean fs_write_all;
+ boolean clamp_color;
+ unsigned nr_cbufs;
};
#endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index a6cfa704ca5..7c1976f12e0 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -299,6 +299,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
}
rstate = &rs->rstate;
+ rs->clamp_vertex_color = state->clamp_vertex_color;
+ rs->clamp_fragment_color = state->clamp_fragment_color;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
@@ -374,14 +376,17 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
static void *r600_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
- struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+ struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state);
+ struct r600_pipe_state *rstate;
union util_color uc;
unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 4 : 0;
- if (rstate == NULL) {
+ if (ss == NULL) {
return NULL;
}
+ ss->seamless_cube_map = state->seamless_cube_map;
+ rstate = &ss->rstate;
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
@@ -412,7 +417,6 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
struct pipe_resource *texture,
const struct pipe_sampler_view *state)
{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
struct r600_pipe_resource_state *rstate;
const struct util_format_description *desc;
@@ -422,7 +426,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
unsigned char swizzle[4], array_mode = 0, tile_type = 0;
struct r600_bo *bo[2];
- unsigned height, depth;
+ unsigned width, height, depth, offset_level, last_level;
if (resource == NULL)
return NULL;
@@ -448,7 +452,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
}
desc = util_format_description(state->format);
if (desc == NULL) {
- R600_ERR("unknow format %d\n", state->format);
+ R600_ERR("unknown format %d\n", state->format);
}
tmp = (struct r600_resource_texture *)texture;
if (tmp->depth && !tmp->is_flushing_texture) {
@@ -464,12 +468,18 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
rbuffer = &tmp->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
- pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
- array_mode = tmp->array_mode[0];
+
+ offset_level = state->u.tex.first_level;
+ last_level = state->u.tex.last_level - offset_level;
+ width = u_minify(texture->width0, offset_level);
+ height = u_minify(texture->height0, offset_level);
+ depth = u_minify(texture->depth0, offset_level);
+
+ pitch = align(tmp->pitch_in_blocks[offset_level] *
+ util_format_get_blockwidth(state->format), 8);
+ array_mode = tmp->array_mode[offset_level];
tile_type = tmp->tile_type;
- height = texture->height0;
- depth = texture->depth0;
if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
depth = texture->array_size;
@@ -484,18 +494,18 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
S_038000_TILE_MODE(array_mode) |
S_038000_TILE_TYPE(tile_type) |
S_038000_PITCH((pitch / 8) - 1) |
- S_038000_TEX_WIDTH(texture->width0 - 1));
+ S_038000_TEX_WIDTH(width - 1));
rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) |
S_038004_TEX_DEPTH(depth - 1) |
S_038004_DATA_FORMAT(format));
- rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8;
- rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8;
+ rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8;
+ rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8;
rstate->val[4] = (word4 |
S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
S_038010_REQUEST_SIZE(1) |
S_038010_ENDIAN_SWAP(endian) |
- S_038010_BASE_LEVEL(state->u.tex.first_level));
- rstate->val[5] = (S_038014_LAST_LEVEL(state->u.tex.last_level) |
+ S_038010_BASE_LEVEL(0));
+ rstate->val[5] = (S_038014_LAST_LEVEL(last_level) |
S_038014_BASE_ARRAY(state->u.tex.first_layer) |
S_038014_LAST_ARRAY(state->u.tex.last_layer));
rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) |
@@ -524,13 +534,16 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views;
int i;
+ int has_depth = 0;
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
- if (resource[i])
+ if (resource[i]) {
+ if (((struct r600_resource_texture *)resource[i]->base.texture)->depth)
+ has_depth = 1;
r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
i + R600_MAX_CONST_BUFFERS);
- else
+ } else
r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
i + R600_MAX_CONST_BUFFERS);
@@ -538,6 +551,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
views[i]);
+ } else {
+ if (resource[i]) {
+ if (((struct r600_resource_texture *)resource[i]->base.texture)->depth)
+ has_depth = 1;
+ }
}
}
for (i = count; i < NUM_TEX_UNITS; i++) {
@@ -547,30 +565,61 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
+ rctx->have_depth_texture = has_depth;
rctx->ps_samplers.n_views = count;
}
+static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean enable)
+{
+ struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
+ if (rstate == NULL)
+ return;
+
+ rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP;
+ r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
+ (enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)),
+ 1, NULL);
+
+ free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]);
+ rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate;
+ r600_context_pipe_state_set(&rctx->ctx, rstate);
+}
+
static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
+ struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states;
+ int seamless = -1;
memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
rctx->ps_samplers.n_samplers = count;
for (int i = 0; i < count; i++) {
- r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i);
+ r600_context_pipe_state_set_ps_sampler(&rctx->ctx, &sstates[i]->rstate, i);
+
+ if (sstates[i])
+ seamless = sstates[i]->seamless_cube_map;
}
+
+ if (seamless != -1)
+ r600_set_seamless_cubemap(rctx, seamless);
}
static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
+ struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states;
+ int seamless = -1;
for (int i = 0; i < count; i++) {
- r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i);
+ r600_context_pipe_state_set_vs_sampler(&rctx->ctx, &sstates[i]->rstate, i);
+
+ if (sstates[i])
+ seamless = sstates[i]->seamless_cube_map;
}
+
+ if (seamless != -1)
+ r600_set_seamless_cubemap(rctx, seamless);
}
static void r600_set_clip_state(struct pipe_context *ctx,
@@ -730,6 +779,9 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+ if (rtex->depth)
+ rctx->have_depth_fb = TRUE;
+
if (rtex->depth && !rtex->is_flushing_texture) {
r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
rtex = rtex->flushed_depth_texture;
@@ -892,6 +944,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
/* build states */
+ rctx->have_depth_fb = 0;
for (int i = 0; i < state->nr_cbufs; i++) {
r600_cb(rctx, rstate, state, i);
}
@@ -1031,6 +1084,46 @@ void r600_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.texture_barrier = r600_texture_barrier;
}
+void r600_adjust_gprs(struct r600_pipe_context *rctx)
+{
+ enum radeon_family family;
+ struct r600_pipe_state rstate;
+ unsigned num_ps_gprs = rctx->default_ps_gprs;
+ unsigned num_vs_gprs = rctx->default_vs_gprs;
+ unsigned tmp;
+ int diff;
+
+ family = r600_get_family(rctx->radeon);
+
+ if (family >= CHIP_CEDAR)
+ return;
+
+ if (!rctx->ps_shader && !rctx->vs_shader)
+ return;
+
+ if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs)
+ {
+ diff = rctx->ps_shader->shader.bc.ngpr - rctx->default_ps_gprs;
+ num_vs_gprs -= diff;
+ num_ps_gprs += diff;
+ }
+
+ if (rctx->vs_shader->shader.bc.ngpr > rctx->default_vs_gprs)
+ {
+ diff = rctx->vs_shader->shader.bc.ngpr - rctx->default_vs_gprs;
+ num_ps_gprs -= diff;
+ num_vs_gprs += diff;
+ }
+
+ tmp = 0;
+ tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
+ tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+ rstate.nregs = 0;
+ r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL);
+
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
void r600_init_config(struct r600_pipe_context *rctx)
{
int ps_prio;
@@ -1173,6 +1266,9 @@ void r600_init_config(struct r600_pipe_context *rctx)
break;
}
+ rctx->default_ps_gprs = num_ps_gprs;
+ rctx->default_vs_gprs = num_vs_gprs;
+
rstate->id = R600_PIPE_STATE_CONFIG;
/* SQ_CONFIG */
@@ -1206,7 +1302,7 @@ void r600_init_config(struct r600_pipe_context *rctx)
/* SQ_GPR_RESOURCE_MGMT_2 */
tmp = 0;
tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
- tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs);
+ tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
/* SQ_THREAD_RESOURCE_MGMT */
@@ -1234,14 +1330,22 @@ void r600_init_config(struct r600_pipe_context *rctx)
if (family >= CHIP_RV770) {
r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
+ S_009508_DISABLE_CUBE_ANISO(1) |
+ S_009508_SYNC_GRADIENT(1) |
+ S_009508_SYNC_WALKER(1) |
+ S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL);
} else {
r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
+ S_009508_DISABLE_CUBE_ANISO(1) |
+ S_009508_SYNC_GRADIENT(1) |
+ S_009508_SYNC_WALKER(1) |
+ S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index a670ac02be2..d9140403e5a 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -28,6 +28,7 @@
#include <util/u_format.h>
#include <pipebuffer/pb_buffer.h>
#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
#include "r600_formats.h"
#include "r600_pipe.h"
#include "r600d.h"
@@ -99,6 +100,8 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state)
if (state == NULL)
return;
+ rctx->clamp_vertex_color = rs->clamp_vertex_color;
+ rctx->clamp_fragment_color = rs->clamp_fragment_color;
rctx->flatshade = rs->flatshade;
rctx->sprite_coord_enable = rs->sprite_coord_enable;
rctx->rasterizer = rs;
@@ -112,7 +115,7 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state)
r600_polygon_offset_update(rctx);
}
if (rctx->ps_shader && rctx->vs_shader)
- r600_spi_update(rctx);
+ rctx->spi_dirty = true;
}
void r600_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -257,7 +260,9 @@ void *r600_create_shader_state(struct pipe_context *ctx,
struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader);
int r;
- r = r600_pipe_shader_create(ctx, shader, state->tokens);
+ shader->tokens = tgsi_dup_tokens(state->tokens);
+
+ r = r600_pipe_shader_create(ctx, shader);
if (r) {
return NULL;
}
@@ -273,8 +278,10 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
if (state) {
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate);
}
- if (rctx->ps_shader && rctx->vs_shader)
- r600_spi_update(rctx);
+ if (rctx->ps_shader && rctx->vs_shader) {
+ rctx->spi_dirty = true;
+ r600_adjust_gprs(rctx);
+ }
}
void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
@@ -286,8 +293,10 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
if (state) {
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate);
}
- if (rctx->ps_shader && rctx->vs_shader)
- r600_spi_update(rctx);
+ if (rctx->ps_shader && rctx->vs_shader) {
+ rctx->spi_dirty = true;
+ r600_adjust_gprs(rctx);
+ }
}
void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
@@ -299,6 +308,7 @@ void r600_delete_ps_shader(struct pipe_context *ctx, void *state)
rctx->ps_shader = NULL;
}
+ free(shader->tokens);
r600_pipe_shader_destroy(ctx, shader);
free(shader);
}
@@ -312,6 +322,7 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
rctx->vs_shader = NULL;
}
+ free(shader->tokens);
r600_pipe_shader_destroy(ctx, shader);
free(shader);
}
@@ -347,14 +358,23 @@ static void r600_spi_update(struct r600_pipe_context *rctx)
struct r600_pipe_shader *shader = rctx->ps_shader;
struct r600_pipe_state *rstate = &rctx->spi;
struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp;
+ unsigned i, tmp, sid;
if (rctx->spi.id == 0)
r600_spi_block_init(rctx, &rctx->spi);
rstate->nregs = 0;
for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION ||
+ rshader->input[i].name == TGSI_SEMANTIC_FACE)
+ if (rctx->family >= CHIP_CEDAR)
+ continue;
+ else
+ sid=0;
+ else
+ sid=r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i);
+
+ tmp = S_028644_SEMANTIC(sid);
if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
@@ -378,6 +398,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx)
r600_pipe_state_mod_reg(rstate, tmp);
}
+ rctx->spi_dirty = false;
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
@@ -517,21 +538,39 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
}
}
+static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ int r;
+
+ r600_pipe_shader_destroy(ctx, shader);
+ r = r600_pipe_shader_create(ctx, shader);
+ if (r) {
+ return r;
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
+
+ return 0;
+}
+
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_resource *rbuffer;
- u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask;
struct r600_draw rdraw;
- struct r600_drawl draw = {};
- unsigned prim;
+ struct r600_drawl draw;
+ unsigned prim, mask;
- r600_flush_depth_textures(rctx);
- u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL);
+ if (!rctx->blit) {
+ if (rctx->have_depth_fb || rctx->have_depth_texture)
+ r600_flush_depth_textures(rctx);
+ }
+ u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info);
r600_vertex_buffer_update(rctx);
draw.info = *info;
draw.ctx = ctx;
+ draw.index_buffer = NULL;
if (info->indexed && rctx->index_buffer.buffer) {
draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
@@ -549,57 +588,29 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_upload_index_buffer(rctx, &draw);
}
} else {
+ draw.index_size = 0;
+ draw.index_buffer_offset = 0;
draw.info.index_bias = info->start;
}
- vgt_dma_swap_mode = 0;
- switch (draw.index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- if (R600_BIG_ENDIAN) {
- vgt_dma_swap_mode = ENDIAN_8IN16;
- }
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- if (R600_BIG_ENDIAN) {
- vgt_dma_swap_mode = ENDIAN_8IN32;
- }
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw.index_size);
- return;
- }
if (r600_conv_pipe_prim(draw.info.mode, &prim))
return;
- if (unlikely(rctx->ps_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- if (unlikely(rctx->vs_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- /* there should be enough input */
- if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
- return;
- }
+
+ if (rctx->vs_shader->shader.clamp_color != rctx->clamp_vertex_color)
+ r600_shader_rebuild(ctx, rctx->vs_shader);
+
+ if ((rctx->ps_shader->shader.clamp_color != rctx->clamp_fragment_color) ||
+ ((rctx->family >= CHIP_CEDAR) && rctx->ps_shader->shader.fs_write_all &&
+ (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs)))
+ r600_shader_rebuild(ctx, rctx->ps_shader);
+
+ if (rctx->spi_dirty)
+ r600_spi_update(rctx);
if (rctx->alpha_ref_dirty)
r600_update_alpha_ref(rctx);
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
+ mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1;
if (rctx->vgt.id != R600_PIPE_STATE_VGT) {
rctx->vgt.id = R600_PIPE_STATE_VGT;
@@ -633,8 +644,10 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
rdraw.vgt_num_indices = draw.info.count;
rdraw.vgt_num_instances = draw.info.instance_count;
- rdraw.vgt_index_type = vgt_dma_index_type | (vgt_dma_swap_mode << 2);
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
+ rdraw.vgt_index_type = ((draw.index_size == 4) ? 1 : 0);
+ if (R600_BIG_ENDIAN)
+ rdraw.vgt_index_type |= (draw.index_size >> 1) << 2;
+ rdraw.vgt_draw_initiator = draw.index_size ? 0 : 2;
rdraw.indices = NULL;
if (draw.index_buffer) {
rbuffer = (struct r600_resource*)draw.index_buffer;
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index acd41a21214..8711dbf1720 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -284,7 +284,7 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format)
static inline uint32_t r600_translate_colorswap(enum pipe_format format)
{
switch (format) {
- /* 8-bit buffers. */
+ /* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
return V_0280A0_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
@@ -297,7 +297,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_L4A4_UNORM:
return V_0280A0_SWAP_ALT;
- /* 16-bit buffers. */
+ /* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return V_0280A0_SWAP_STD_REV;
@@ -320,9 +320,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R16_UNORM:
case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_R16_FLOAT:
return V_0280A0_SWAP_STD;
- /* 32-bit buffers. */
+ /* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
return V_0280A0_SWAP_STD_REV;
@@ -368,13 +369,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R32_FLOAT:
return V_0280A0_SWAP_STD;
- /* 64-bit buffers. */
+ /* 64-bit buffers. */
case PIPE_FORMAT_R32G32_FLOAT:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- /* 128-bit buffers. */
+ /* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32A32_SNORM:
case PIPE_FORMAT_R32G32B32A32_UNORM:
@@ -392,7 +393,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_L4A4_UNORM:
return V_0280A0_COLOR_4_4;
- /* 8-bit buffers. */
+ /* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
@@ -401,7 +402,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R8_SNORM:
return V_0280A0_COLOR_8;
- /* 16-bit buffers. */
+ /* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return V_0280A0_COLOR_5_6_5;
@@ -425,7 +426,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R16_SNORM:
return V_0280A0_COLOR_16;
- /* 32-bit buffers. */
+ case PIPE_FORMAT_R16_FLOAT:
+ return V_0280A0_COLOR_16_FLOAT;
+
+ /* 32-bit buffers. */
case PIPE_FORMAT_A8B8G8R8_SRGB:
case PIPE_FORMAT_A8B8G8R8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -468,7 +472,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R11G11B10_FLOAT:
return V_0280A0_COLOR_10_11_11_FLOAT;
- /* 64-bit buffers. */
+ /* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16_USCALED:
case PIPE_FORMAT_R16G16B16A16_USCALED:
case PIPE_FORMAT_R16G16B16_SSCALED:
@@ -488,20 +492,21 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R32G32_SSCALED:
return V_0280A0_COLOR_32_32;
- /* 128-bit buffers. */
+ /* 96-bit buffers. */
case PIPE_FORMAT_R32G32B32_FLOAT:
return V_0280A0_COLOR_32_32_32_FLOAT;
+
+ /* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return V_0280A0_COLOR_32_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_SNORM:
case PIPE_FORMAT_R32G32B32A32_UNORM:
return V_0280A0_COLOR_32_32_32_32;
- /* YUV buffers. */
+ /* YUV buffers. */
case PIPE_FORMAT_UYVY:
case PIPE_FORMAT_YUYV:
default:
- /* R600_ERR("unsupported color format %d %s\n", format, util_format_name(format)); */
return ~0; /* Unsupported. */
}
}
@@ -513,11 +518,11 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_0280A0_COLOR_4_4:
return(ENDIAN_NONE);
- /* 8-bit buffers. */
+ /* 8-bit buffers. */
case V_0280A0_COLOR_8:
return(ENDIAN_NONE);
- /* 16-bit buffers. */
+ /* 16-bit buffers. */
case V_0280A0_COLOR_5_6_5:
case V_0280A0_COLOR_1_5_5_5:
case V_0280A0_COLOR_4_4_4_4:
@@ -525,7 +530,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_0280A0_COLOR_8_8:
return(ENDIAN_8IN16);
- /* 32-bit buffers. */
+ /* 32-bit buffers. */
case V_0280A0_COLOR_8_8_8_8:
case V_0280A0_COLOR_2_10_10_10:
case V_0280A0_COLOR_8_24:
@@ -535,7 +540,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_0280A0_COLOR_16_16:
return(ENDIAN_8IN32);
- /* 64-bit buffers. */
+ /* 64-bit buffers. */
case V_0280A0_COLOR_16_16_16_16:
case V_0280A0_COLOR_16_16_16_16_FLOAT:
return(ENDIAN_8IN16);
@@ -544,7 +549,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
case V_0280A0_COLOR_32_32:
return(ENDIAN_8IN32);
- /* 128-bit buffers. */
+ /* 128-bit buffers. */
case V_0280A0_COLOR_32_32_32_FLOAT:
case V_0280A0_COLOR_32_32_32_32_FLOAT:
case V_0280A0_COLOR_32_32_32_32:
@@ -565,7 +570,7 @@ static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *scree
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
{
return r600_translate_colorformat(format) != ~0 &&
- r600_translate_colorswap(format) != ~0;
+ r600_translate_colorswap(format) != ~0;
}
static INLINE boolean r600_is_zs_format_supported(enum pipe_format format)
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 77cdd8dc33d..854761d17cb 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -243,10 +243,11 @@ static void r600_setup_miptree(struct pipe_screen *screen,
struct radeon *radeon = (struct radeon *)screen->winsys;
enum chip_class chipc = r600_get_family_class(radeon);
unsigned size, layer_size, i, offset;
- unsigned nblocksx, nblocksy;
+ unsigned nblocksx, nblocksy, extra_size = 0;
for (i = 0, offset = 0; i <= ptex->last_level; i++) {
unsigned blocksize = util_format_get_blocksize(ptex->format);
+ unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
r600_texture_set_array_mode(screen, rtex, i, array_mode);
@@ -265,9 +266,13 @@ static void r600_setup_miptree(struct pipe_screen *screen,
else
size = layer_size * ptex->array_size;
+ /* evergreen stores depth and stencil separately */
+ if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format))
+ extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align);
+
/* align base image and start of miptree */
if ((i == 0) || (i == 1))
- offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode));
+ offset = align(offset, base_align);
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */
@@ -275,7 +280,7 @@ static void r600_setup_miptree(struct pipe_screen *screen,
offset += size;
}
- rtex->size = offset;
+ rtex->size = offset + extra_size;
}
/* Figure out whether u_blitter will fallback to a transfer operation.
@@ -1091,8 +1096,9 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
goto out_word4;
}
}
-
+ goto out_unknown;
}
+
out_word4:
if (word4_p)
*word4_p = word4;
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
index 7482d15e12f..307fd57e21a 100644
--- a/src/gallium/drivers/r600/r600_translate.c
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -48,6 +48,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600,
&r600->context, *index_buffer, 0, *start, count, ptr);
pipe_resource_reference(index_buffer, out_buffer);
+ pipe_resource_reference(&out_buffer, NULL);
*index_size = 2;
*start = out_offset / 2;
break;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 9281b08bd82..f6eec24cc05 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -2556,6 +2556,9 @@
#define S_009508_DISABLE_CUBE_WRAP(x) (((x) & 0x1) << 0)
#define G_009508_DISABLE_CUBE_WRAP(x) (((x) >> 0) & 0x1)
#define C_009508_DISABLE_CUBE_WRAP 0xFFFFFFFE
+#define S_009508_DISABLE_CUBE_ANISO(x) (((x) & 0x1) << 1)
+#define G_009508_DISABLE_CUBE_ANISO(x) (((x) >> 1) & 0x1)
+#define C_009508_DISABLE_CUBE_ANISO (~(1 << 1))
#define S_009508_SYNC_GRADIENT(x) (((x) & 0x1) << 24)
#define G_009508_SYNC_GRADIENT(x) (((x) >> 24) & 0x1)
#define C_009508_SYNC_GRADIENT 0xFEFFFFFF
@@ -3465,9 +3468,14 @@
#define SQ_TEX_INST_LD 0x03
#define SQ_TEX_INST_GET_GRADIENTS_H 0x7
#define SQ_TEX_INST_GET_GRADIENTS_V 0x8
+#define SQ_TEX_INST_SET_GRADIENTS_H 0xB
+#define SQ_TEX_INST_SET_GRADIENTS_V 0xC
#define SQ_TEX_INST_SAMPLE 0x10
#define SQ_TEX_INST_SAMPLE_L 0x11
+#define SQ_TEX_INST_SAMPLE_G 0x14
#define SQ_TEX_INST_SAMPLE_C 0x18
+#define SQ_TEX_INST_SAMPLE_C_L 0x19
+#define SQ_TEX_INST_SAMPLE_C_G 0x1C
#endif
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index dbbc249258d..cfb1b9d8d0d 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -207,6 +207,14 @@ void svga_context_flush( struct svga_context *svga,
svga->curr.nr_fbs = 0;
+ /* Flush the upload managers to ensure recycling of upload buffers
+ * without throttling. This should really be conditioned on
+ * pipe_buffer_map_range not supporting PIPE_TRANSFER_UNSYNCHRONIZED.
+ */
+
+ u_upload_flush(svga->upload_vb);
+ u_upload_flush(svga->upload_ib);
+
/* Ensure that texture dma uploads are processed
* before submitting commands.
*/
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index eca529d262e..34b9e85c1a3 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -372,9 +372,6 @@ struct svga_context
/** List of buffers with queued transfers */
struct list_head dirty_buffers;
-
- /** Was the previous draw done with the SW path? */
- boolean prev_draw_swtnl;
};
/* A flag for each state_tracker state object:
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index d8af615ede1..aa096692888 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -145,7 +145,7 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
unsigned i;
/* Unmap upload manager vertex buffers */
- u_upload_flush(svga->upload_vb);
+ u_upload_unmap(svga->upload_vb);
for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
@@ -156,7 +156,7 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
}
/* Unmap upload manager index buffers */
- u_upload_flush(svga->upload_ib);
+ u_upload_unmap(svga->upload_ib);
for (i = 0; i < hwtnl->cmd.prim_count; i++) {
if (hwtnl->cmd.prim_ib[i]) {
@@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
}
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+ int index_bias)
+{
+ hwtnl->index_bias = index_bias;
+}
@@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
unsigned size = vb ? vb->width0 : 0;
unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
- unsigned index_bias = range->indexBias;
+ int index_bias = (int) range->indexBias + hwtnl->index_bias;
unsigned width;
assert(vb);
assert(size);
assert(offset < size);
- assert(index_bias >= 0);
assert(min_index <= max_index);
- assert(offset + index_bias*stride < size);
+ if (index_bias >= 0) {
+ assert(offset + index_bias*stride < size);
+ }
if (min_index != ~0) {
assert(offset + (index_bias + min_index) * stride < size);
}
@@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+ hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
hwtnl->cmd.prim_count++;
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
index a2403d802be..1dac17421e1 100644
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
enum pipe_error
svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+ int index_bias);
+
#endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
index ca658ac6745..8126f7ee23c 100644
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -116,6 +116,13 @@ struct draw_cmd {
struct svga_hwtnl {
struct svga_context *svga;
struct u_upload_mgr *upload_ib;
+
+ /* Additional negative index bias due to partial buffer uploads
+ * This is compensated for in the offset associated with all
+ * vertex buffers.
+ */
+
+ int index_bias;
/* Flatshade information:
*/
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 2093bcae101..d53edcb23c5 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -25,6 +25,7 @@
#include "svga_cmd.h"
+#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_prim.h"
#include "util/u_time.h"
@@ -37,6 +38,178 @@
#include "svga_state.h"
#include "svga_swtnl.h"
#include "svga_debug.h"
+#include "svga_resource_buffer.h"
+#include "util/u_upload_mgr.h"
+
+/**
+ * Determine the ranges to upload for the user-buffers referenced
+ * by the next draw command.
+ *
+ * TODO: It might be beneficial to support multiple ranges. In that case,
+ * the struct svga_buffer::uploaded member should be made an array or a
+ * list, since we need to account for the possibility that different ranges
+ * may be uploaded to different hardware buffers chosen by the utility
+ * upload manager.
+ */
+
+static void
+svga_user_buffer_range(struct svga_context *svga,
+ unsigned start,
+ unsigned count,
+ unsigned instance_count)
+{
+ const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+ int i;
+
+ /*
+ * Release old uploaded range (if not done already) and
+ * initialize new ranges.
+ */
+
+ for (i=0; i < svga->curr.velems->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
+
+ if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+ struct svga_buffer *buffer = svga_buffer(vb->buffer);
+
+ pipe_resource_reference(&buffer->uploaded.buffer, NULL);
+ buffer->uploaded.start = ~0;
+ buffer->uploaded.end = 0;
+ }
+ }
+
+ for (i=0; i < svga->curr.velems->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
+
+ if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+ struct svga_buffer *buffer = svga_buffer(vb->buffer);
+ unsigned first, size;
+ unsigned instance_div = ve[i].instance_divisor;
+ unsigned elemSize = util_format_get_blocksize(ve[i].src_format);
+
+ svga->dirty |= SVGA_NEW_VBUFFER;
+
+ if (instance_div) {
+ first = ve[i].src_offset;
+ count = (instance_count + instance_div - 1) / instance_div;
+ size = vb->stride * (count - 1) + elemSize;
+ } else if (vb->stride) {
+ first = vb->stride * start + ve[i].src_offset;
+ size = vb->stride * (count - 1) + elemSize;
+ } else {
+ /* Only a single vertex!
+ * Upload with the largest vertex size the hw supports,
+ * if possible.
+ */
+ first = ve[i].src_offset;
+ size = MIN2(16, vb->buffer->width0);
+ }
+
+ buffer->uploaded.start = MIN2(buffer->uploaded.start, first);
+ buffer->uploaded.end = MAX2(buffer->uploaded.end, first + size);
+ }
+ }
+}
+
+/**
+ * svga_upload_user_buffers - upload parts of user buffers
+ *
+ * This function streams a part of a user buffer to hw and fills
+ * svga_buffer::uploaded with information on the upload.
+ */
+
+static int
+svga_upload_user_buffers(struct svga_context *svga,
+ unsigned start,
+ unsigned count,
+ unsigned instance_count)
+{
+ const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+ unsigned i;
+ int ret;
+
+ svga_user_buffer_range(svga, start, count, instance_count);
+
+ for (i=0; i < svga->curr.velems->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
+
+ if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+ struct svga_buffer *buffer = svga_buffer(vb->buffer);
+ boolean flushed;
+
+ /*
+ * Check if already uploaded. Otherwise go ahead and upload.
+ */
+
+ if (buffer->uploaded.buffer)
+ continue;
+
+ ret = u_upload_buffer( svga->upload_vb,
+ 0,
+ buffer->uploaded.start,
+ buffer->uploaded.end - buffer->uploaded.start,
+ &buffer->b.b,
+ &buffer->uploaded.offset,
+ &buffer->uploaded.buffer,
+ &flushed);
+
+ if (ret)
+ return ret;
+
+ if (0)
+ debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d"
+ " sz %d\n",
+ __FUNCTION__,
+ i,
+ buffer,
+ buffer->uploaded.buffer,
+ buffer->uploaded.offset,
+ buffer->uploaded.start,
+ buffer->uploaded.end - buffer->uploaded.start);
+
+ vb->buffer_offset = buffer->uploaded.offset;
+ }
+ }
+
+ return PIPE_OK;
+}
+
+/**
+ * svga_release_user_upl_buffers - release uploaded parts of user buffers
+ *
+ * This function releases the hw copy of the uploaded fraction of the
+ * user-buffer. It's important to do this as soon as all draw calls
+ * affecting the uploaded fraction are issued, as this allows for
+ * efficient reuse of the hardware surface backing the uploaded fraction.
+ *
+ * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer
+ * is set to 0.
+ */
+
+static void
+svga_release_user_upl_buffers(struct svga_context *svga)
+{
+ unsigned i;
+ unsigned nr;
+
+ nr = svga->curr.num_vertex_buffers;
+
+ for (i = 0; i < nr; ++i) {
+ struct pipe_vertex_buffer *vb = &svga->curr.vb[i];
+
+ if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+ struct svga_buffer *buffer = svga_buffer(vb->buffer);
+
+ buffer->uploaded.start = ~0;
+ buffer->uploaded.end = 0;
+ if (buffer->uploaded.buffer)
+ pipe_resource_reference(&buffer->uploaded.buffer, NULL);
+ }
+ }
+}
@@ -50,6 +223,7 @@ retry_draw_range_elements( struct svga_context *svga,
unsigned prim,
unsigned start,
unsigned count,
+ unsigned instance_count,
boolean do_retry )
{
enum pipe_error ret = 0;
@@ -61,6 +235,10 @@ retry_draw_range_elements( struct svga_context *svga,
svga->curr.rast->templ.flatshade,
svga->curr.rast->templ.flatshade_first );
+ ret = svga_upload_user_buffers( svga, min_index + index_bias,
+ max_index - min_index + 1, instance_count );
+ if (ret != PIPE_OK)
+ goto retry;
ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
if (ret)
@@ -84,7 +262,7 @@ retry:
index_buffer, index_size, index_bias,
min_index, max_index,
prim, start, count,
- FALSE );
+ instance_count, FALSE );
}
return ret;
@@ -96,6 +274,7 @@ retry_draw_arrays( struct svga_context *svga,
unsigned prim,
unsigned start,
unsigned count,
+ unsigned instance_count,
boolean do_retry )
{
enum pipe_error ret;
@@ -107,6 +286,11 @@ retry_draw_arrays( struct svga_context *svga,
svga->curr.rast->templ.flatshade,
svga->curr.rast->templ.flatshade_first );
+ ret = svga_upload_user_buffers( svga, start, count, instance_count );
+
+ if (ret != PIPE_OK)
+ goto retry;
+
ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
if (ret)
goto retry;
@@ -127,6 +311,7 @@ retry:
prim,
start,
count,
+ instance_count,
FALSE );
}
@@ -141,18 +326,11 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
unsigned reduced_prim = u_reduced_prim( info->mode );
unsigned count = info->count;
enum pipe_error ret = 0;
+ boolean needed_swtnl;
if (!u_trim_pipe_prim( info->mode, &count ))
return;
- if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) {
- /* We're switching between SW and HW drawing. Do a flush to avoid
- * mixing HW and SW rendering with the same vertex buffer.
- */
- pipe->flush(pipe, NULL);
- svga->prev_draw_swtnl = svga->state.sw.need_swtnl;
- }
-
/*
* Mark currently bound target surfaces as dirty
* doesn't really matter if it is done before drawing.
@@ -167,6 +345,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
}
+ needed_swtnl = svga->state.sw.need_swtnl;
+
svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
#ifdef DEBUG
@@ -176,6 +356,20 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
#endif
if (svga->state.sw.need_swtnl) {
+ if (!needed_swtnl) {
+ /*
+ * We're switching from HW to SW TNL. SW TNL will require mapping all
+ * currently bound vertex buffers, some of which may already be
+ * referenced in the current command buffer as result of previous HW
+ * TNL. So flush now, to prevent the context to flush while a referred
+ * vertex buffer is mapped.
+ */
+
+ svga_context_flush(svga, NULL);
+ }
+
+ /* Avoid leaking the previous hwtnl bias to swtnl */
+ svga_hwtnl_set_index_bias( svga->hwtnl, 0 );
ret = svga_swtnl_draw_vbo( svga, info );
}
else {
@@ -194,6 +388,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode,
info->start + offset,
info->count,
+ info->instance_count,
TRUE );
}
else {
@@ -201,10 +396,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode,
info->start,
info->count,
+ info->instance_count,
TRUE );
}
}
+ svga_release_user_upl_buffers( svga );
+
if (SVGA_DEBUG & DEBUG_FLUSH) {
svga_hwtnl_flush_retry( svga );
svga_context_flush(svga, NULL);
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index 95032213fa5..ca8c8d1f5ea 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -129,6 +129,12 @@ struct svga_buffer
* is the relative offset within that buffer.
*/
unsigned offset;
+
+ /**
+ * Range of user buffer that is uploaded in @buffer at @offset.
+ */
+ unsigned start;
+ unsigned end;
} uploaded;
/**
@@ -193,7 +199,11 @@ svga_buffer(struct pipe_resource *buffer)
static INLINE boolean
svga_buffer_is_user_buffer( struct pipe_resource *buffer )
{
- return svga_buffer(buffer)->user;
+ if (buffer) {
+ return svga_buffer(buffer)->user;
+ } else {
+ return FALSE;
+ }
}
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 923958674b4..a657a8bc224 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -651,8 +651,6 @@ svga_redefine_user_buffer(struct pipe_context *pipe,
unsigned offset,
unsigned size)
{
- struct svga_screen *ss = svga_screen(pipe->screen);
- struct svga_context *svga = svga_context(pipe);
struct svga_buffer *sbuf = svga_buffer(resource);
assert(sbuf->user);
@@ -661,19 +659,8 @@ svga_redefine_user_buffer(struct pipe_context *pipe,
assert(!sbuf->hwbuf);
/*
- * Release any uploaded user buffer.
- *
- * TODO: As an optimization, we could try to update the uploaded buffer
- * instead.
+ * We always treat the contents of user-buffers as volatile,
+ * so no particular action needed here.
*/
- pipe_resource_reference(&sbuf->uploaded.buffer, NULL);
-
- pipe_mutex_lock(ss->swc_mutex);
-
- sbuf->key.size.width = sbuf->b.b.width0 = offset + size;
-
- pipe_mutex_unlock(ss->swc_mutex);
-
- svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT;
}
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index 7c393a1da8d..47eab1a9739 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -38,57 +38,6 @@
#include "svga_hw_reg.h"
-static int
-upload_user_buffers( struct svga_context *svga )
-{
- enum pipe_error ret = PIPE_OK;
- int i;
- int nr;
-
- if (0)
- debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
-
- nr = svga->curr.num_vertex_buffers;
-
- for (i = 0; i < nr; i++)
- {
- if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
- {
- struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
-
- if (!buffer->uploaded.buffer) {
- boolean flushed;
- ret = u_upload_buffer( svga->upload_vb,
- 0, 0,
- buffer->b.b.width0,
- &buffer->b.b,
- &buffer->uploaded.offset,
- &buffer->uploaded.buffer,
- &flushed);
- if (ret)
- return ret;
-
- if (0)
- debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n",
- __FUNCTION__,
- i,
- buffer,
- buffer->uploaded.buffer,
- buffer->uploaded.offset,
- buffer->b.b.width0);
- }
-
- svga->curr.vb[i].buffer_offset = buffer->uploaded.offset;
- }
- }
-
- if (0)
- debug_printf("%s: DONE\n", __FUNCTION__);
-
- return ret;
-}
-
-
/***********************************************************************
*/
@@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
const struct pipe_vertex_element *ve = svga->curr.velems->velem;
SVGA3dVertexDecl decl;
unsigned i;
+ unsigned neg_bias = 0;
assert(svga->curr.velems->count >=
svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
@@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
svga_hwtnl_reset_vdecl( svga->hwtnl,
svga->curr.velems->count );
+ /**
+ * We can't set the VDECL offset to something negative, so we
+ * must calculate a common negative additional index bias, and modify
+ * the VDECL offsets accordingly so they *all* end up positive.
+ *
+ * Note that the exact value of the negative index bias is not that
+ * important, since we compensate for it when we calculate the vertex
+ * buffer offset below. The important thing is that all vertex buffer
+ * offsets remain positive.
+ *
+ * Note that we use a negative bias variable in order to make the
+ * rounding maths more easy to follow, and to avoid int / unsigned
+ * confusion.
+ */
+
for (i = 0; i < svga->curr.velems->count; i++) {
- const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+ const struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
+ struct svga_buffer *buffer;
+ unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+ unsigned tmp_neg_bias = 0;
+
+ if (!vb->buffer)
+ continue;
+
+ buffer = svga_buffer(vb->buffer);
+ if (buffer->uploaded.start > offset) {
+ tmp_neg_bias = buffer->uploaded.start - offset;
+ if (vb->stride)
+ tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
+ neg_bias = MAX2(neg_bias, tmp_neg_bias);
+ }
+ }
+
+ for (i = 0; i < svga->curr.velems->count; i++) {
+ const struct pipe_vertex_buffer *vb =
+ &svga->curr.vb[ve[i].vertex_buffer_index];
unsigned usage, index;
- struct svga_buffer *buffer = svga_buffer(vb->buffer);
+ struct svga_buffer *buffer;
+ if (!vb->buffer)
+ continue;
+ buffer= svga_buffer(vb->buffer);
svga_generate_vdecl_semantics( i, &usage, &index );
/* SVGA_NEW_VELEMENT
@@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
decl.identity.usage = usage;
decl.identity.usageIndex = index;
decl.array.stride = vb->stride;
- decl.array.offset = (vb->buffer_offset +
- ve[i].src_offset);
+
+ /* Compensate for partially uploaded vbo, and
+ * for the negative index bias.
+ */
+ decl.array.offset = (vb->buffer_offset
+ + ve[i].src_offset
+ + neg_bias * vb->stride
+ - buffer->uploaded.start);
+
+ assert(decl.array.offset >= 0);
svga_hwtnl_vdecl( svga->hwtnl,
i,
@@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
vb->buffer );
}
+ svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias );
return 0;
}
@@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
static int emit_hw_vdecl( struct svga_context *svga,
unsigned dirty )
{
- int ret = 0;
-
/* SVGA_NEW_NEED_SWTNL
*/
if (svga->state.sw.need_swtnl)
return 0; /* Do not emit during swtnl */
- /* If we get to here, we know that we're going to draw. Upload
- * userbuffers now and try to combine multiple userbuffers from
- * multiple draw calls into a single host buffer for performance.
- */
- if (svga->curr.any_user_vertex_buffers) {
- ret = upload_user_buffers( svga );
- if (ret)
- return ret;
- }
-
return emit_hw_vs_vdecl( svga, dirty );
}