summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c104
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h28
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c70
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.h5
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_iterate.c6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_iterate.h5
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c16
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c12
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c21
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c139
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.c25
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c31
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h4
-rw-r--r--src/gallium/auxiliary/util/Makefile3
-rw-r--r--src/gallium/auxiliary/util/SConscript3
-rw-r--r--src/gallium/auxiliary/util/u_blit.c7
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c718
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h230
-rw-r--r--src/gallium/auxiliary/util/u_dl.c79
-rw-r--r--src/gallium/auxiliary/util/u_dl.h61
-rw-r--r--src/gallium/auxiliary/util/u_format_access.py4
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c57
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c70
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.h13
-rw-r--r--src/gallium/auxiliary/util/u_texture.c102
-rw-r--r--src/gallium/auxiliary/util/u_texture.h54
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_debug.h71
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c117
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h49
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.c9
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c3
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h1
-rw-r--r--src/gallium/drivers/nv30/nv30_state_emit.c10
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c3
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h1
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c10
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h12
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c33
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c512
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c21
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c64
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c52
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c91
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h1
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c41
-rw-r--r--src/gallium/drivers/r300/Makefile2
-rw-r--r--src/gallium/drivers/r300/SConscript2
-rw-r--r--src/gallium/drivers/r300/r300_blit.c130
-rw-r--r--src/gallium/drivers/r300/r300_blit.h (renamed from src/gallium/drivers/r300/r300_clear.h)22
-rw-r--r--src/gallium/drivers/r300/r300_clear.c38
-rw-r--r--src/gallium/drivers/r300/r300_context.c9
-rw-r--r--src/gallium/drivers/r300/r300_context.h4
-rw-r--r--src/gallium/drivers/r300/r300_emit.c31
-rw-r--r--src/gallium/drivers/r300/r300_reg.h1
-rw-r--r--src/gallium/drivers/r300/r300_render.c8
-rw-r--r--src/gallium/drivers/r300/r300_screen.c8
-rw-r--r--src/gallium/drivers/r300/r300_state.c3
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c19
-rw-r--r--src/gallium/include/pipe/p_defines.h3
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h38
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_buffer.c6
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_buffer.h1
-rw-r--r--src/gallium/winsys/drm/radeon/core/radeon_drm.c1
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.c18
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.h1
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c5
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_texcopy.c3
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c9
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c15
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.h16
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c16
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c123
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c34
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c98
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_shaderinst.h7
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_dma.c1
92 files changed, 3127 insertions, 604 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 4092f78f4a2..92903fe57f3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -942,3 +942,107 @@ tgsi_default_full_dst_register( void )
return full_dst_register;
}
+struct tgsi_property
+tgsi_default_property( void )
+{
+ struct tgsi_property property;
+
+ property.Type = TGSI_TOKEN_TYPE_PROPERTY;
+ property.NrTokens = 1;
+ property.PropertyName = TGSI_PROPERTY_GS_INPUT_PRIM;
+ property.Padding = 0;
+
+ return property;
+}
+
+struct tgsi_property
+tgsi_build_property(unsigned property_name,
+ struct tgsi_header *header)
+{
+ struct tgsi_property property;
+
+ property = tgsi_default_property();
+ property.PropertyName = property_name;
+
+ header_bodysize_grow( header );
+
+ return property;
+}
+
+
+struct tgsi_full_property
+tgsi_default_full_property( void )
+{
+ struct tgsi_full_property full_property;
+
+ full_property.Property = tgsi_default_property();
+ memset(full_property.u, 0,
+ sizeof(struct tgsi_property_data) * 8);
+
+ return full_property;
+}
+
+static void
+property_grow(
+ struct tgsi_property *property,
+ struct tgsi_header *header )
+{
+ assert( property->NrTokens < 0xFF );
+
+ property->NrTokens++;
+
+ header_bodysize_grow( header );
+}
+
+struct tgsi_property_data
+tgsi_build_property_data(
+ unsigned value,
+ struct tgsi_property *property,
+ struct tgsi_header *header )
+{
+ struct tgsi_property_data property_data;
+
+ property_data.Data = value;
+
+ property_grow( property, header );
+
+ return property_data;
+}
+
+unsigned
+tgsi_build_full_property(
+ const struct tgsi_full_property *full_prop,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize )
+{
+ unsigned size = 0, i;
+ struct tgsi_property *property;
+
+ if( maxsize <= size )
+ return 0;
+ property = (struct tgsi_property *) &tokens[size];
+ size++;
+
+ *property = tgsi_build_property(
+ TGSI_PROPERTY_GS_INPUT_PRIM,
+ header );
+
+ assert( full_prop->Property.NrTokens <= 8 + 1 );
+
+ for( i = 0; i < full_prop->Property.NrTokens - 1; i++ ) {
+ struct tgsi_property_data *data;
+
+ if( maxsize <= size )
+ return 0;
+ data = (struct tgsi_property_data *) &tokens[size];
+ size++;
+
+ *data = tgsi_build_property_data(
+ full_prop->u[i].Data,
+ property,
+ header );
+ }
+
+ return size;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index ffea786770c..9de2757fe40 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -127,6 +127,34 @@ tgsi_build_full_immediate(
unsigned maxsize );
/*
+ * properties
+ */
+
+struct tgsi_property
+tgsi_default_property( void );
+
+struct tgsi_property
+tgsi_build_property(
+ unsigned property_name,
+ struct tgsi_header *header );
+
+struct tgsi_full_property
+tgsi_default_full_property( void );
+
+struct tgsi_property_data
+tgsi_build_property_data(
+ unsigned value,
+ struct tgsi_property *property,
+ struct tgsi_header *header );
+
+unsigned
+tgsi_build_full_property(
+ const struct tgsi_full_property *full_prop,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize );
+
+/*
* instruction
*/
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index d09ab925656..ba1357697d1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -101,7 +101,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
"ADDR",
"IMM",
"LOOP",
- "PRED"
+ "PRED",
+ "SV"
};
static const char *interpolate_names[] =
@@ -149,6 +150,27 @@ static const char *texture_names[] =
"SHADOWRECT"
};
+static const char *property_names[] =
+{
+ "GS_INPUT_PRIMITIVE",
+ "GS_OUTPUT_PRIMITIVE",
+ "GS_MAX_OUTPUT_VERTICES"
+};
+
+static const char *primitive_names[] =
+{
+ "POINTS",
+ "LINES",
+ "LINE_LOOP",
+ "LINE_STRIP",
+ "TRIANGLES",
+ "TRIANGLE_STRIP",
+ "TRIANGLE_FAN",
+ "QUADS",
+ "QUAD_STRIP",
+ "POLYGON"
+};
+
static void
_dump_register(
@@ -273,6 +295,50 @@ tgsi_dump_declaration(
}
static boolean
+iter_property(
+ struct tgsi_iterate_context *iter,
+ struct tgsi_full_property *prop )
+{
+ int i;
+ struct dump_ctx *ctx = (struct dump_ctx *)iter;
+
+ assert(Elements(property_names) == TGSI_PROPERTY_COUNT);
+
+ TXT( "PROPERTY " );
+ ENM(prop->Property.PropertyName, property_names);
+
+ if (prop->Property.NrTokens > 1)
+ TXT(" ");
+
+ for (i = 0; i < prop->Property.NrTokens - 1; ++i) {
+ switch (prop->Property.PropertyName) {
+ case TGSI_PROPERTY_GS_INPUT_PRIM:
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ ENM(prop->u[i].Data, primitive_names);
+ break;
+ default:
+ SID( prop->u[i].Data );
+ break;
+ }
+ if (i < prop->Property.NrTokens - 2)
+ TXT( ", " );
+ }
+ EOL();
+
+ return TRUE;
+}
+
+void tgsi_dump_property(
+ const struct tgsi_full_property *prop )
+{
+ struct dump_ctx ctx;
+
+ ctx.printf = dump_ctx_printf;
+
+ iter_property( &ctx.iter, (struct tgsi_full_property *)prop );
+}
+
+static boolean
iter_immediate(
struct tgsi_iterate_context *iter,
struct tgsi_full_immediate *imm )
@@ -492,6 +558,7 @@ tgsi_dump(
ctx.iter.iterate_instruction = iter_instruction;
ctx.iter.iterate_declaration = iter_declaration;
ctx.iter.iterate_immediate = iter_immediate;
+ ctx.iter.iterate_property = iter_property;
ctx.iter.epilog = NULL;
ctx.instno = 0;
@@ -546,6 +613,7 @@ tgsi_dump_str(
ctx.base.iter.iterate_instruction = iter_instruction;
ctx.base.iter.iterate_declaration = iter_declaration;
ctx.base.iter.iterate_immediate = iter_immediate;
+ ctx.base.iter.iterate_property = iter_property;
ctx.base.iter.epilog = NULL;
ctx.base.instno = 0;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index ad1e647ec90..4cd27317b36 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -49,6 +49,7 @@ tgsi_dump(
struct tgsi_full_immediate;
struct tgsi_full_instruction;
struct tgsi_full_declaration;
+struct tgsi_full_property;
void
tgsi_dump_immediate(
@@ -63,6 +64,10 @@ void
tgsi_dump_declaration(
const struct tgsi_full_declaration *decl );
+void
+tgsi_dump_property(
+ const struct tgsi_full_property *prop );
+
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 22984c32320..717358620c5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -336,6 +336,9 @@ tgsi_exec_machine_bind_shader(
numInstructions++;
break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
default:
assert( 0 );
}
@@ -1158,6 +1161,7 @@ fetch_src_file_channel(
break;
case TGSI_FILE_INPUT:
+ case TGSI_FILE_SYSTEM_VALUE:
chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
@@ -1302,6 +1306,7 @@ fetch_source(
*/
switch (reg->Register.File) {
case TGSI_FILE_INPUT:
+ case TGSI_FILE_SYSTEM_VALUE:
index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
@@ -1892,7 +1897,8 @@ exec_declaration(struct tgsi_exec_machine *mach,
const struct tgsi_full_declaration *decl)
{
if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
- if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ if (decl->Declaration.File == TGSI_FILE_INPUT ||
+ decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
uint first, last, mask;
first = decl->Range.First;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
index 7b384f5e12a..0ba5fe48419 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
@@ -66,6 +66,12 @@ tgsi_iterate_shader(
goto fail;
break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ if (ctx->iterate_property)
+ if (!ctx->iterate_property( ctx, &parse.FullToken.FullProperty ))
+ goto fail;
+ break;
+
default:
assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/tgsi_iterate.h
index ef5a33ebce9..8d67f22c429 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_iterate.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.h
@@ -57,6 +57,11 @@ struct tgsi_iterate_context
struct tgsi_full_immediate *imm );
boolean
+ (* iterate_property)(
+ struct tgsi_iterate_context *ctx,
+ struct tgsi_full_property *prop );
+
+ boolean
(* epilog)(
struct tgsi_iterate_context *ctx );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 8f2b6a307d3..fa65ecb9975 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -220,6 +220,22 @@ tgsi_parse_token(
break;
}
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ {
+ struct tgsi_full_property *prop = &ctx->FullToken.FullProperty;
+ uint prop_count;
+
+ memset(prop, 0, sizeof *prop);
+ copy_token(&prop->Property, &token);
+
+ prop_count = prop->Property.NrTokens - 1;
+ for (i = 0; i < prop_count; i++) {
+ next_token(ctx, &prop->u[i]);
+ }
+
+ break;
+ }
+
default:
assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 3aa1979a63a..439a57269b7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -67,6 +67,12 @@ struct tgsi_full_immediate
union tgsi_immediate_data u[4];
};
+struct tgsi_full_property
+{
+ struct tgsi_property Property;
+ struct tgsi_property_data u[8];
+};
+
#define TGSI_FULL_MAX_DST_REGISTERS 2
#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */
@@ -86,6 +92,7 @@ union tgsi_full_token
struct tgsi_full_declaration FullDeclaration;
struct tgsi_full_immediate FullImmediate;
struct tgsi_full_instruction FullInstruction;
+ struct tgsi_full_property FullProperty;
};
struct tgsi_parse_context
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index da6ad6da04c..138d2d095bb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -293,6 +293,7 @@ emit_fetch(struct gen_context *gen,
case TGSI_SWIZZLE_W:
switch (reg->Register.File) {
case TGSI_FILE_INPUT:
+ case TGSI_FILE_SYSTEM_VALUE:
{
int offset = (reg->Register.Index * 4 + swizzle) * 16;
int offset_reg = emit_li_offset(gen, offset);
@@ -1173,7 +1174,8 @@ emit_declaration(
struct ppc_function *func,
struct tgsi_full_declaration *decl )
{
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ if( decl->Declaration.File == TGSI_FILE_INPUT ||
+ decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) {
#if 0
unsigned first, last, mask;
unsigned i, j;
@@ -1339,6 +1341,9 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
}
break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
default:
ok = 0;
assert( 0 );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index b5d1faa897a..c27579e7942 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -324,6 +324,17 @@ iter_immediate(
return TRUE;
}
+
+static boolean
+iter_property(
+ struct tgsi_iterate_context *iter,
+ struct tgsi_full_property *prop )
+{
+ /*struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;*/
+
+ return TRUE;
+}
+
static boolean
epilog(
struct tgsi_iterate_context *iter )
@@ -367,6 +378,7 @@ tgsi_sanity_check(
ctx.iter.iterate_instruction = iter_instruction;
ctx.iter.iterate_declaration = iter_declaration;
ctx.iter.iterate_immediate = iter_immediate;
+ ctx.iter.iterate_property = iter_property;
ctx.iter.epilog = epilog;
memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index a5d2db04ec1..5f5c95bfbdb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -97,7 +97,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *src =
&fullinst->Src[i];
- if (src->Register.File == TGSI_FILE_INPUT) {
+ if (src->Register.File == TGSI_FILE_INPUT ||
+ src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
const int ind = src->Register.Index;
if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
if (src->Register.SwizzleX == TGSI_SWIZZLE_X) {
@@ -128,7 +129,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->file_count[file]++;
info->file_max[file] = MAX2(info->file_max[file], (int)reg);
- if (file == TGSI_FILE_INPUT) {
+ if (file == TGSI_FILE_INPUT || file == TGSI_FILE_SYSTEM_VALUE) {
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
@@ -160,6 +161,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->file_max[file] = MAX2(info->file_max[file], (int)reg);
}
break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ {
+ const struct tgsi_full_property *fullprop
+ = &parse.FullToken.FullProperty;
+
+ info->properties[info->num_properties].name =
+ fullprop->Property.PropertyName;
+ memcpy(info->properties[info->num_properties].data,
+ fullprop->u, 8 * sizeof(unsigned));;
+
+ ++info->num_properties;
+ }
+ break;
default:
assert( 0 );
@@ -212,6 +226,7 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
/* Do a whole bunch of checks for a simple move */
if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
src->Register.File != TGSI_FILE_INPUT ||
+ src->Register.File != TGSI_FILE_SYSTEM_VALUE ||
dst->Register.File != TGSI_FILE_OUTPUT ||
src->Register.Index != dst->Register.Index ||
@@ -235,6 +250,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
/* fall-through */
case TGSI_TOKEN_TYPE_IMMEDIATE:
/* fall-through */
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ /* fall-through */
default:
; /* no-op */
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 8a7ee0c7e4f..a1e8a4f6bb7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -33,7 +33,6 @@
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
-
/**
* Shader summary info
*/
@@ -61,8 +60,13 @@ struct tgsi_shader_info
boolean uses_kill; /**< KIL or KILP instruction used? */
boolean uses_fogcoord; /**< fragment shader uses fog coord? */
boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
-};
+ struct {
+ unsigned name;
+ unsigned data[8];
+ } properties[TGSI_PROPERTY_COUNT];
+ uint num_properties;
+};
extern void
tgsi_scan_shader(const struct tgsi_token *tokens,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 76051ea0d8e..d63c75dafb3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1288,6 +1288,7 @@ emit_fetch(
break;
case TGSI_FILE_INPUT:
+ case TGSI_FILE_SYSTEM_VALUE:
emit_inputf(
func,
xmm,
@@ -2633,7 +2634,8 @@ emit_declaration(
struct x86_function *func,
struct tgsi_full_declaration *decl )
{
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ if( decl->Declaration.File == TGSI_FILE_INPUT ||
+ decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) {
unsigned first, last, mask;
unsigned i, j;
@@ -2952,6 +2954,9 @@ tgsi_emit_sse2(
num_immediates++;
}
break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ /* we just ignore them for now */
+ break;
default:
ok = 0;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index eb376fa9572..f000958bfc0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -27,6 +27,7 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "pipe/p_defines.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
#include "tgsi_info.h"
@@ -59,6 +60,21 @@ static boolean uprcase( char c )
return c;
}
+/*
+ * Ignore case of str1 and assume str2 is already uppercase.
+ * Return TRUE iff str1 and str2 are equal.
+ */
+static int
+streq_nocase_uprcase(const char *str1,
+ const char *str2)
+{
+ while (*str1 && uprcase(*str1) == *str2) {
+ str1++;
+ str2++;
+ }
+ return *str1 == *str2;
+}
+
static boolean str_match_no_case( const char **pcur, const char *str )
{
const char *cur = *pcur;
@@ -110,6 +126,20 @@ static boolean parse_uint( const char **pcur, uint *val )
return FALSE;
}
+static boolean parse_identifier( const char **pcur, char *ret )
+{
+ const char *cur = *pcur;
+ int i = 0;
+ if (is_alpha_underscore( cur )) {
+ ret[i++] = *cur++;
+ while (is_alpha_underscore( cur ))
+ ret[i++] = *cur++;
+ *pcur = cur;
+ return TRUE;
+ }
+ return FALSE;
+}
+
/* Parse floating point.
*/
static boolean parse_float( const char **pcur, float *val )
@@ -229,7 +259,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
"ADDR",
"IMM",
"LOOP",
- "PRED"
+ "PRED",
+ "SV"
};
static boolean
@@ -939,6 +970,106 @@ static boolean parse_immediate( struct translate_ctx *ctx )
return TRUE;
}
+static const char *property_names[] =
+{
+ "GS_INPUT_PRIMITIVE",
+ "GS_OUTPUT_PRIMITIVE",
+ "GS_MAX_OUTPUT_VERTICES"
+};
+
+static const char *primitive_names[] =
+{
+ "POINTS",
+ "LINES",
+ "LINE_LOOP",
+ "LINE_STRIP",
+ "TRIANGLES",
+ "TRIANGLE_STRIP",
+ "TRIANGLE_FAN",
+ "QUADS",
+ "QUAD_STRIP",
+ "POLYGON"
+};
+
+static boolean
+parse_primitive( const char **pcur, uint *primitive )
+{
+ uint i;
+
+ for (i = 0; i < PIPE_PRIM_MAX; i++) {
+ const char *cur = *pcur;
+
+ if (str_match_no_case( &cur, primitive_names[i])) {
+ *primitive = i;
+ *pcur = cur;
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+static boolean parse_property( struct translate_ctx *ctx )
+{
+ struct tgsi_full_property prop;
+ uint property_name;
+ uint values[8];
+ uint advance;
+ char id[64];
+
+ if (!eat_white( &ctx->cur )) {
+ report_error( ctx, "Syntax error" );
+ return FALSE;
+ }
+ if (!parse_identifier( &ctx->cur, id )) {
+ report_error( ctx, "Syntax error" );
+ return FALSE;
+ }
+ for (property_name = 0; property_name < TGSI_PROPERTY_COUNT;
+ ++property_name) {
+ if (streq_nocase_uprcase(id, property_names[property_name])) {
+ break;
+ }
+ }
+ if (property_name >= TGSI_PROPERTY_COUNT) {
+ debug_printf( "\nError: Unknown property : '%s'", id );
+ return FALSE;
+ }
+
+ eat_opt_white( &ctx->cur );
+ switch(property_name) {
+ case TGSI_PROPERTY_GS_INPUT_PRIM:
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ if (!parse_primitive(&ctx->cur, &values[0] )) {
+ report_error( ctx, "Unknown primitive name as property!" );
+ return FALSE;
+ }
+ break;
+ default:
+ if (!parse_uint(&ctx->cur, &values[0] )) {
+ report_error( ctx, "Expected unsigned integer as property!" );
+ return FALSE;
+ }
+ }
+
+ prop = tgsi_default_full_property();
+ prop.Property.PropertyName = property_name;
+ prop.Property.NrTokens += 1;
+ prop.u[0].Data = values[0];
+
+ advance = tgsi_build_full_property(
+ &prop,
+ ctx->tokens_cur,
+ ctx->header,
+ (uint) (ctx->tokens_end - ctx->tokens_cur) );
+ if (advance == 0)
+ return FALSE;
+ ctx->tokens_cur += advance;
+
+ return TRUE;
+}
+
+
static boolean translate( struct translate_ctx *ctx )
{
eat_opt_white( &ctx->cur );
@@ -947,7 +1078,6 @@ static boolean translate( struct translate_ctx *ctx )
while (*ctx->cur != '\0') {
uint label_val = 0;
-
if (!eat_white( &ctx->cur )) {
report_error( ctx, "Syntax error" );
return FALSE;
@@ -955,7 +1085,6 @@ static boolean translate( struct translate_ctx *ctx )
if (*ctx->cur == '\0')
break;
-
if (parse_label( ctx, &label_val )) {
if (!parse_instruction( ctx, TRUE ))
return FALSE;
@@ -968,6 +1097,10 @@ static boolean translate( struct translate_ctx *ctx )
if (!parse_immediate( ctx ))
return FALSE;
}
+ else if (str_match_no_case( &ctx->cur, "PROPERTY" )) {
+ if (!parse_property( ctx ))
+ return FALSE;
+ }
else if (!parse_instruction( ctx, FALSE )) {
return FALSE;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index 8b8f489b355..ae875f29abf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -79,6 +79,19 @@ emit_immediate(struct tgsi_transform_context *ctx,
}
+static void
+emit_property(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_property *prop)
+{
+ uint ti = ctx->ti;
+
+ ti += tgsi_build_full_property(prop,
+ ctx->tokens_out + ti,
+ ctx->header,
+ ctx->max_tokens_out - ti);
+ ctx->ti = ti;
+}
+
/**
* Apply user-defined transformations to the input shader to produce
@@ -110,6 +123,7 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
ctx->emit_instruction = emit_instruction;
ctx->emit_declaration = emit_declaration;
ctx->emit_immediate = emit_immediate;
+ ctx->emit_property = emit_property;
ctx->tokens_out = tokens_out;
ctx->max_tokens_out = max_tokens_out;
@@ -182,6 +196,17 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
ctx->emit_immediate(ctx, fullimm);
}
break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ {
+ struct tgsi_full_property *fullprop
+ = &parse.FullToken.FullProperty;
+
+ if (ctx->transform_property)
+ ctx->transform_property(ctx, fullprop);
+ else
+ ctx->emit_property(ctx, fullprop);
+ }
+ break;
default:
assert( 0 );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index a121adbaef4..818478e277a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -53,6 +53,8 @@ struct tgsi_transform_context
void (*transform_immediate)(struct tgsi_transform_context *ctx,
struct tgsi_full_immediate *imm);
+ void (*transform_property)(struct tgsi_transform_context *ctx,
+ struct tgsi_full_property *prop);
/**
* Called at end of input program to allow caller to append extra
@@ -73,6 +75,8 @@ struct tgsi_transform_context
const struct tgsi_full_declaration *decl);
void (*emit_immediate)(struct tgsi_transform_context *ctx,
const struct tgsi_full_immediate *imm);
+ void (*emit_property)(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_property *prop);
struct tgsi_header *header;
uint max_tokens_out;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 3f943845f5b..1e730e53427 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -90,6 +90,11 @@ struct ureg_program
unsigned vs_inputs[UREG_MAX_INPUT/32];
struct {
+ unsigned index;
+ } gs_input[UREG_MAX_INPUT];
+ unsigned nr_gs_inputs;
+
+ struct {
unsigned semantic_name;
unsigned semantic_index;
} output[UREG_MAX_OUTPUT];
@@ -278,6 +283,22 @@ ureg_DECL_vs_input( struct ureg_program *ureg,
}
+struct ureg_src
+ureg_DECL_gs_input(struct ureg_program *ureg,
+ unsigned index)
+{
+ if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
+ ureg->gs_input[ureg->nr_gs_inputs].index = index;
+ ureg->nr_gs_inputs++;
+ } else {
+ set_bad(ureg);
+ }
+
+ /* XXX: Add suport for true 2D input registers. */
+ return ureg_src_register(TGSI_FILE_INPUT, index);
+}
+
+
struct ureg_dst
ureg_DECL_output( struct ureg_program *ureg,
unsigned name,
@@ -964,8 +985,7 @@ static void emit_decls( struct ureg_program *ureg )
emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
}
}
- }
- else {
+ } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
for (i = 0; i < ureg->nr_fs_inputs; i++) {
emit_decl( ureg,
TGSI_FILE_INPUT,
@@ -974,6 +994,13 @@ static void emit_decls( struct ureg_program *ureg )
ureg->fs_input[i].semantic_index,
ureg->fs_input[i].interp );
}
+ } else {
+ for (i = 0; i < ureg->nr_gs_inputs; i++) {
+ emit_decl_range(ureg,
+ TGSI_FILE_INPUT,
+ ureg->gs_input[i].index,
+ 1);
+ }
}
for (i = 0; i < ureg->nr_outputs; i++) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 94cc70a2082..7e3e7bcf1d3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -133,6 +133,10 @@ struct ureg_src
ureg_DECL_vs_input( struct ureg_program *,
unsigned index );
+struct ureg_src
+ureg_DECL_gs_input(struct ureg_program *,
+ unsigned index);
+
struct ureg_dst
ureg_DECL_output( struct ureg_program *,
unsigned semantic_name,
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 1d8bb55bbd6..3ed90fd1b70 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -9,8 +9,10 @@ C_SOURCES = \
u_debug_symbol.c \
u_debug_stack.c \
u_blit.c \
+ u_blitter.c \
u_cache.c \
u_cpu_detect.c \
+ u_dl.c \
u_draw_quad.c \
u_format.c \
u_format_access.c \
@@ -30,6 +32,7 @@ C_SOURCES = \
u_stream_stdc.c \
u_stream_wd.c \
u_surface.c \
+ u_texture.c \
u_tile.c \
u_time.c \
u_timed_winsys.c \
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index 8d99106d0b8..2a546d19dc0 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -23,6 +23,7 @@ util = env.ConvenienceLibrary(
source = [
'u_bitmask.c',
'u_blit.c',
+ 'u_blitter.c',
'u_cache.c',
'u_cpu_detect.c',
'u_debug.c',
@@ -30,6 +31,7 @@ util = env.ConvenienceLibrary(
'u_debug_memory.c',
'u_debug_stack.c',
'u_debug_symbol.c',
+ 'u_dl.c',
'u_draw_quad.c',
'u_format.c',
'u_format_access.c',
@@ -48,6 +50,7 @@ util = env.ConvenienceLibrary(
'u_stream_stdc.c',
'u_stream_wd.c',
'u_surface.c',
+ 'u_texture.c',
'u_tile.c',
'u_time.c',
'u_timed_winsys.c',
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index abe1de3302b..c9050ca864c 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -126,7 +126,8 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
}
/* fragment shader */
- ctx->fs[TGSI_WRITEMASK_XYZW] = util_make_fragment_tex_shader(pipe);
+ ctx->fs[TGSI_WRITEMASK_XYZW] =
+ util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
ctx->vbuf = NULL;
/* init vertex data that doesn't change */
@@ -420,7 +421,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_set_sampler_textures(ctx->cso, 1, &tex);
if (ctx->fs[writemask] == NULL)
- ctx->fs[writemask] = util_make_fragment_tex_shader_writemask(pipe, writemask);
+ ctx->fs[writemask] =
+ util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
+ writemask);
/* shaders */
cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
new file mode 100644
index 00000000000..895af2c8d00
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -0,0 +1,718 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Blitter utility to facilitate acceleration of the clear, surface_copy,
+ * and surface_fill functions.
+ *
+ * @author Marek Olšák
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_blitter.h"
+#include "util/u_draw_quad.h"
+#include "util/u_pack_color.h"
+#include "util/u_rect.h"
+#include "util/u_simple_shaders.h"
+#include "util/u_texture.h"
+
+struct blitter_context_priv
+{
+ struct blitter_context blitter;
+
+ struct pipe_context *pipe; /**< pipe context */
+ struct pipe_buffer *vbuf; /**< quad */
+
+ float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */
+
+ /* Templates for various state objects. */
+ struct pipe_depth_stencil_alpha_state template_dsa;
+ struct pipe_sampler_state template_sampler_state;
+
+ /* Constant state objects. */
+ /* Vertex shaders. */
+ void *vs_col; /**< Vertex shader which passes {pos, color} to the output */
+ void *vs_tex; /**<Vertex shader which passes {pos, texcoord} to the output.*/
+
+ /* Fragment shaders. */
+ /* FS which outputs a color to multiple color buffers. */
+ void *fs_col[PIPE_MAX_COLOR_BUFS];
+
+ /* FS which outputs a color from a texture,
+ where the index is PIPE_TEXTURE_* to be sampled. */
+ void *fs_texfetch_col[PIPE_MAX_TEXTURE_TYPES];
+
+ /* FS which outputs a depth from a texture,
+ where the index is PIPE_TEXTURE_* to be sampled. */
+ void *fs_texfetch_depth[PIPE_MAX_TEXTURE_TYPES];
+
+ /* Blend state. */
+ void *blend_write_color; /**< blend state with writemask of RGBA */
+ void *blend_keep_color; /**< blend state with writemask of 0 */
+
+ /* Depth stencil alpha state. */
+ void *dsa_write_depth_stencil[0xff]; /**< indices are stencil clear values */
+ void *dsa_write_depth_keep_stencil;
+ void *dsa_keep_depth_stencil;
+
+ /* Sampler state for clamping to a miplevel. */
+ void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Rasterizer state. */
+ void *rs_state;
+};
+
+struct blitter_context *util_blitter_create(struct pipe_context *pipe)
+{
+ struct blitter_context_priv *ctx;
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state *dsa;
+ struct pipe_rasterizer_state rs_state;
+ struct pipe_sampler_state *sampler_state;
+ unsigned i;
+
+ ctx = CALLOC_STRUCT(blitter_context_priv);
+ if (!ctx)
+ return NULL;
+
+ ctx->pipe = pipe;
+
+ /* init state objects for them to be considered invalid */
+ ctx->blitter.saved_fb_state.nr_cbufs = ~0;
+ ctx->blitter.saved_num_textures = ~0;
+ ctx->blitter.saved_num_sampler_states = ~0;
+
+ /* blend state objects */
+ memset(&blend, 0, sizeof(blend));
+ ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend);
+
+ blend.colormask = PIPE_MASK_RGBA;
+ ctx->blend_write_color = pipe->create_blend_state(pipe, &blend);
+
+ /* depth stencil alpha state objects */
+ dsa = &ctx->template_dsa;
+ ctx->dsa_keep_depth_stencil =
+ pipe->create_depth_stencil_alpha_state(pipe, dsa);
+
+ dsa->depth.enabled = 1;
+ dsa->depth.writemask = 1;
+ dsa->depth.func = PIPE_FUNC_ALWAYS;
+ ctx->dsa_write_depth_keep_stencil =
+ pipe->create_depth_stencil_alpha_state(pipe, dsa);
+
+ dsa->stencil[0].enabled = 1;
+ dsa->stencil[0].func = PIPE_FUNC_ALWAYS;
+ dsa->stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
+ dsa->stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+ dsa->stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
+ dsa->stencil[0].valuemask = 0xff;
+ dsa->stencil[0].writemask = 0xff;
+ /* The DSA state objects which write depth and stencil are created
+ * on-demand. */
+
+ /* sampler state */
+ sampler_state = &ctx->template_sampler_state;
+ sampler_state->wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler_state->wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler_state->wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ /* The sampler state objects which sample from a specified mipmap level
+ * are created on-demand. */
+
+ /* rasterizer state */
+ memset(&rs_state, 0, sizeof(rs_state));
+ rs_state.front_winding = PIPE_WINDING_CW;
+ rs_state.cull_mode = PIPE_WINDING_NONE;
+ rs_state.bypass_vs_clip_and_viewport = 1;
+ rs_state.gl_rasterization_rules = 1;
+ ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
+
+ /* fragment shaders are created on-demand */
+
+ /* vertex shaders */
+ {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_COLOR };
+ const uint semantic_indices[] = { 0, 0 };
+ ctx->vs_col =
+ util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+ semantic_indices);
+ }
+ {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC };
+ const uint semantic_indices[] = { 0, 0 };
+ ctx->vs_tex =
+ util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+ semantic_indices);
+ }
+
+ /* set invariant vertex coordinates */
+ for (i = 0; i < 4; i++)
+ ctx->vertices[i][0][3] = 1; /*v.w*/
+
+ /* create the vertex buffer */
+ ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
+ 32,
+ PIPE_BUFFER_USAGE_VERTEX,
+ sizeof(ctx->vertices));
+
+ return &ctx->blitter;
+}
+
+void util_blitter_destroy(struct blitter_context *blitter)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ struct pipe_context *pipe = ctx->pipe;
+ int i;
+
+ pipe->delete_blend_state(pipe, ctx->blend_write_color);
+ pipe->delete_blend_state(pipe, ctx->blend_keep_color);
+ pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+ pipe->delete_depth_stencil_alpha_state(pipe,
+ ctx->dsa_write_depth_keep_stencil);
+
+ for (i = 0; i < 0xff; i++)
+ if (ctx->dsa_write_depth_stencil[i])
+ pipe->delete_depth_stencil_alpha_state(pipe,
+ ctx->dsa_write_depth_stencil[i]);
+
+ pipe->delete_rasterizer_state(pipe, ctx->rs_state);
+ pipe->delete_vs_state(pipe, ctx->vs_col);
+ pipe->delete_vs_state(pipe, ctx->vs_tex);
+
+ for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
+ if (ctx->fs_texfetch_col[i])
+ pipe->delete_fs_state(pipe, ctx->fs_texfetch_col[i]);
+ if (ctx->fs_texfetch_depth[i])
+ pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
+ }
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
+ if (ctx->fs_col[i])
+ pipe->delete_fs_state(pipe, ctx->fs_col[i]);
+
+ for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+ if (ctx->sampler_state[i])
+ pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
+
+ pipe_buffer_reference(&ctx->vbuf, NULL);
+ FREE(ctx);
+}
+
+static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
+{
+ /* make sure these CSOs have been saved */
+ assert(ctx->blitter.saved_blend_state &&
+ ctx->blitter.saved_dsa_state &&
+ ctx->blitter.saved_rs_state &&
+ ctx->blitter.saved_fs &&
+ ctx->blitter.saved_vs);
+}
+
+static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->pipe;
+
+ /* restore the state objects which are always required to be saved */
+ pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state);
+ pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
+ pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
+ pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+
+ ctx->blitter.saved_blend_state = 0;
+ ctx->blitter.saved_dsa_state = 0;
+ ctx->blitter.saved_rs_state = 0;
+ ctx->blitter.saved_fs = 0;
+ ctx->blitter.saved_vs = 0;
+
+ /* restore the state objects which are required to be saved before copy/fill
+ */
+ if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) {
+ pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state);
+ ctx->blitter.saved_fb_state.nr_cbufs = ~0;
+ }
+
+ if (ctx->blitter.saved_num_sampler_states != ~0) {
+ pipe->bind_fragment_sampler_states(pipe,
+ ctx->blitter.saved_num_sampler_states,
+ ctx->blitter.saved_sampler_states);
+ ctx->blitter.saved_num_sampler_states = ~0;
+ }
+
+ if (ctx->blitter.saved_num_textures != ~0) {
+ pipe->set_fragment_sampler_textures(pipe,
+ ctx->blitter.saved_num_textures,
+ ctx->blitter.saved_textures);
+ ctx->blitter.saved_num_textures = ~0;
+ }
+}
+
+static void blitter_set_rectangle(struct blitter_context_priv *ctx,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float depth)
+{
+ int i;
+
+ /* set vertex positions */
+ ctx->vertices[0][0][0] = x1; /*v0.x*/
+ ctx->vertices[0][0][1] = y1; /*v0.y*/
+
+ ctx->vertices[1][0][0] = x2; /*v1.x*/
+ ctx->vertices[1][0][1] = y1; /*v1.y*/
+
+ ctx->vertices[2][0][0] = x2; /*v2.x*/
+ ctx->vertices[2][0][1] = y2; /*v2.y*/
+
+ ctx->vertices[3][0][0] = x1; /*v3.x*/
+ ctx->vertices[3][0][1] = y2; /*v3.y*/
+
+ for (i = 0; i < 4; i++)
+ ctx->vertices[i][0][2] = depth; /*z*/
+}
+
+static void blitter_set_clear_color(struct blitter_context_priv *ctx,
+ const float *rgba)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ ctx->vertices[i][1][0] = rgba[0];
+ ctx->vertices[i][1][1] = rgba[1];
+ ctx->vertices[i][1][2] = rgba[2];
+ ctx->vertices[i][1][3] = rgba[3];
+ }
+}
+
+static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
+ struct pipe_surface *surf,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2)
+{
+ int i;
+ float s1 = x1 / (float)surf->width;
+ float t1 = y1 / (float)surf->height;
+ float s2 = x2 / (float)surf->width;
+ float t2 = y2 / (float)surf->height;
+
+ ctx->vertices[0][1][0] = s1; /*t0.s*/
+ ctx->vertices[0][1][1] = t1; /*t0.t*/
+
+ ctx->vertices[1][1][0] = s2; /*t1.s*/
+ ctx->vertices[1][1][1] = t1; /*t1.t*/
+
+ ctx->vertices[2][1][0] = s2; /*t2.s*/
+ ctx->vertices[2][1][1] = t2; /*t2.t*/
+
+ ctx->vertices[3][1][0] = s1; /*t3.s*/
+ ctx->vertices[3][1][1] = t2; /*t3.t*/
+
+ for (i = 0; i < 4; i++) {
+ ctx->vertices[i][1][2] = 0; /*r*/
+ ctx->vertices[i][1][3] = 1; /*q*/
+ }
+}
+
+static void blitter_set_texcoords_3d(struct blitter_context_priv *ctx,
+ struct pipe_surface *surf,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2)
+{
+ int i;
+ float depth = u_minify(surf->texture->depth0, surf->level);
+ float r = surf->zslice / depth;
+
+ blitter_set_texcoords_2d(ctx, surf, x1, y1, x2, y2);
+
+ for (i = 0; i < 4; i++)
+ ctx->vertices[i][1][2] = r; /*r*/
+}
+
+static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
+ struct pipe_surface *surf,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2)
+{
+ int i;
+ float s1 = x1 / (float)surf->width;
+ float t1 = y1 / (float)surf->height;
+ float s2 = x2 / (float)surf->width;
+ float t2 = y2 / (float)surf->height;
+ const float st[4][2] = {
+ {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2}
+ };
+
+ util_map_texcoords2d_onto_cubemap(surf->face,
+ /* pointer, stride in floats */
+ &st[0][0], 2,
+ &ctx->vertices[0][1][0], 8);
+
+ for (i = 0; i < 4; i++)
+ ctx->vertices[i][1][3] = 1; /*q*/
+}
+
+static void blitter_draw_quad(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->pipe;
+
+ /* write vertices and draw them */
+ pipe_buffer_write(pipe->screen, ctx->vbuf,
+ 0, sizeof(ctx->vertices), ctx->vertices);
+
+ util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN,
+ 4, /* verts */
+ 2); /* attribs/vert */
+}
+
+static INLINE
+void *blitter_get_state_write_depth_stencil(
+ struct blitter_context_priv *ctx,
+ unsigned stencil)
+{
+ struct pipe_context *pipe = ctx->pipe;
+
+ stencil &= 0xff;
+
+ /* Create the DSA state on-demand. */
+ if (!ctx->dsa_write_depth_stencil[stencil]) {
+ ctx->template_dsa.stencil[0].ref_value = stencil;
+
+ ctx->dsa_write_depth_stencil[stencil] =
+ pipe->create_depth_stencil_alpha_state(pipe, &ctx->template_dsa);
+ }
+
+ return ctx->dsa_write_depth_stencil[stencil];
+}
+
+static INLINE
+void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
+ int miplevel)
+{
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
+
+ assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
+
+ /* Create the sampler state on-demand. */
+ if (!ctx->sampler_state[miplevel]) {
+ sampler_state->lod_bias = miplevel;
+ sampler_state->min_lod = miplevel;
+ sampler_state->max_lod = miplevel;
+
+ ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
+ sampler_state);
+ }
+
+ /* Return void** so that it can be passed to bind_fragment_sampler_states
+ * directly. */
+ return &ctx->sampler_state[miplevel];
+}
+
+static INLINE
+void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
+{
+ struct pipe_context *pipe = ctx->pipe;
+ unsigned index = num_cbufs ? num_cbufs - 1 : 0;
+
+ assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
+
+ if (!ctx->fs_col[index])
+ ctx->fs_col[index] =
+ util_make_fragment_clonecolor_shader(pipe, num_cbufs);
+
+ return ctx->fs_col[index];
+}
+
+static INLINE
+void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
+ unsigned tex_target)
+{
+ struct pipe_context *pipe = ctx->pipe;
+
+ assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
+
+ /* Create the fragment shader on-demand. */
+ if (!ctx->fs_texfetch_col[tex_target]) {
+ switch (tex_target) {
+ case PIPE_TEXTURE_1D:
+ ctx->fs_texfetch_col[PIPE_TEXTURE_1D] =
+ util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D);
+ break;
+ case PIPE_TEXTURE_2D:
+ ctx->fs_texfetch_col[PIPE_TEXTURE_2D] =
+ util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
+ break;
+ case PIPE_TEXTURE_3D:
+ ctx->fs_texfetch_col[PIPE_TEXTURE_3D] =
+ util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D);
+ break;
+ case PIPE_TEXTURE_CUBE:
+ ctx->fs_texfetch_col[PIPE_TEXTURE_CUBE] =
+ util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE);
+ break;
+ default:;
+ }
+ }
+
+ return ctx->fs_texfetch_col[tex_target];
+}
+
+static INLINE
+void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
+ unsigned tex_target)
+{
+ struct pipe_context *pipe = ctx->pipe;
+
+ assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
+
+ /* Create the fragment shader on-demand. */
+ if (!ctx->fs_texfetch_depth[tex_target]) {
+ switch (tex_target) {
+ case PIPE_TEXTURE_1D:
+ ctx->fs_texfetch_depth[PIPE_TEXTURE_1D] =
+ util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_1D);
+ break;
+ case PIPE_TEXTURE_2D:
+ ctx->fs_texfetch_depth[PIPE_TEXTURE_2D] =
+ util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D);
+ break;
+ case PIPE_TEXTURE_3D:
+ ctx->fs_texfetch_depth[PIPE_TEXTURE_3D] =
+ util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_3D);
+ break;
+ case PIPE_TEXTURE_CUBE:
+ ctx->fs_texfetch_depth[PIPE_TEXTURE_CUBE] =
+ util_make_fragment_tex_shader_writedepth(pipe,TGSI_TEXTURE_CUBE);
+ break;
+ default:;
+ }
+ }
+
+ return ctx->fs_texfetch_depth[tex_target];
+}
+
+void util_blitter_clear(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ unsigned num_cbufs,
+ unsigned clear_buffers,
+ const float *rgba,
+ double depth, unsigned stencil)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ struct pipe_context *pipe = ctx->pipe;
+
+ assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
+
+ blitter_check_saved_CSOs(ctx);
+
+ /* bind CSOs */
+ if (clear_buffers & PIPE_CLEAR_COLOR)
+ pipe->bind_blend_state(pipe, ctx->blend_write_color);
+ else
+ pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+
+ if (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL)
+ pipe->bind_depth_stencil_alpha_state(pipe,
+ blitter_get_state_write_depth_stencil(ctx, stencil));
+ else
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+
+ pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
+ pipe->bind_vs_state(pipe, ctx->vs_col);
+
+ blitter_set_clear_color(ctx, rgba);
+ blitter_set_rectangle(ctx, 0, 0, width, height, depth);
+ blitter_draw_quad(ctx);
+ blitter_restore_CSOs(ctx);
+}
+
+void util_blitter_copy(struct blitter_context *blitter,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface *src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height,
+ boolean ignore_stencil)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_framebuffer_state fb_state;
+ boolean is_stencil, is_depth;
+ unsigned dst_tex_usage;
+
+ /* give up if textures are not set */
+ assert(dst->texture && src->texture);
+ if (!dst->texture || !src->texture)
+ return;
+
+ is_depth = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_Z) != 0;
+ is_stencil = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_S) != 0;
+ dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
+ PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+ /* check if we can sample from and render to the surfaces */
+ /* (assuming copying a stencil buffer is not possible) */
+ if ((!ignore_stencil && is_stencil) ||
+ !screen->is_format_supported(screen, dst->format, dst->texture->target,
+ dst_tex_usage, 0) ||
+ !screen->is_format_supported(screen, src->format, src->texture->target,
+ PIPE_TEXTURE_USAGE_SAMPLER, 0)) {
+ util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy,
+ width, height);
+ return;
+ }
+
+ /* check whether the states are properly saved */
+ blitter_check_saved_CSOs(ctx);
+ assert(blitter->saved_fb_state.nr_cbufs != ~0);
+ assert(blitter->saved_num_textures != ~0);
+ assert(blitter->saved_num_sampler_states != ~0);
+ assert(src->texture->target < PIPE_MAX_TEXTURE_TYPES);
+
+ /* bind CSOs */
+ fb_state.width = dst->width;
+ fb_state.height = dst->height;
+
+ if (is_depth) {
+ pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+ pipe->bind_depth_stencil_alpha_state(pipe,
+ ctx->dsa_write_depth_keep_stencil);
+ pipe->bind_fs_state(pipe,
+ blitter_get_fs_texfetch_depth(ctx, src->texture->target));
+
+ fb_state.nr_cbufs = 0;
+ fb_state.zsbuf = dst;
+ } else {
+ pipe->bind_blend_state(pipe, ctx->blend_write_color);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+ pipe->bind_fs_state(pipe,
+ blitter_get_fs_texfetch_col(ctx, src->texture->target));
+
+ fb_state.nr_cbufs = 1;
+ fb_state.cbufs[0] = dst;
+ fb_state.zsbuf = 0;
+ }
+
+ pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_vs_state(pipe, ctx->vs_tex);
+ pipe->bind_fragment_sampler_states(pipe, 1,
+ blitter_get_sampler_state(ctx, src->level));
+ pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
+ pipe->set_framebuffer_state(pipe, &fb_state);
+
+ /* set texture coordinates */
+ switch (src->texture->target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ blitter_set_texcoords_2d(ctx, src, srcx, srcy,
+ srcx+width, srcy+height);
+ break;
+ case PIPE_TEXTURE_3D:
+ blitter_set_texcoords_3d(ctx, src, srcx, srcy,
+ srcx+width, srcy+height);
+ break;
+ case PIPE_TEXTURE_CUBE:
+ blitter_set_texcoords_cube(ctx, src, srcx, srcy,
+ srcx+width, srcy+height);
+ break;
+ default:
+ assert(0);
+ }
+
+ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
+ blitter_draw_quad(ctx);
+ blitter_restore_CSOs(ctx);
+}
+
+void util_blitter_fill(struct blitter_context *blitter,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ unsigned value)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ struct pipe_context *pipe = ctx->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_framebuffer_state fb_state;
+ float rgba[4];
+ ubyte ub_rgba[4] = {0};
+ union util_color color;
+ int i;
+
+ assert(dst->texture);
+ if (!dst->texture)
+ return;
+
+ /* check if we can render to the surface */
+ if (pf_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */
+ !screen->is_format_supported(screen, dst->format, dst->texture->target,
+ PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
+ util_surface_fill(pipe, dst, dstx, dsty, width, height, value);
+ return;
+ }
+
+ /* unpack the color */
+ color.ui = value;
+ util_unpack_color_ub(dst->format, &color,
+ ub_rgba, ub_rgba+1, ub_rgba+2, ub_rgba+3);
+ for (i = 0; i < 4; i++)
+ rgba[i] = ubyte_to_float(ub_rgba[i]);
+
+ /* check the saved state */
+ blitter_check_saved_CSOs(ctx);
+ assert(blitter->saved_fb_state.nr_cbufs != ~0);
+
+ /* bind CSOs */
+ pipe->bind_blend_state(pipe, ctx->blend_write_color);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+ pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
+ pipe->bind_vs_state(pipe, ctx->vs_col);
+
+ /* set a framebuffer state */
+ fb_state.width = dst->width;
+ fb_state.height = dst->height;
+ fb_state.nr_cbufs = 1;
+ fb_state.cbufs[0] = dst;
+ fb_state.zsbuf = 0;
+ pipe->set_framebuffer_state(pipe, &fb_state);
+
+ blitter_set_clear_color(ctx, rgba);
+ blitter_set_rectangle(ctx, 0, 0, width, height, 0);
+ blitter_draw_quad(ctx);
+ blitter_restore_CSOs(ctx);
+}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
new file mode 100644
index 00000000000..3da5a6ca525
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -0,0 +1,230 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_BLITTER_H
+#define U_BLITTER_H
+
+#include "util/u_memory.h"
+
+#include "pipe/p_state.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_context;
+
+struct blitter_context
+{
+ /* Private members, really. */
+ void *saved_blend_state; /**< blend state */
+ void *saved_dsa_state; /**< depth stencil alpha state */
+ void *saved_rs_state; /**< rasterizer state */
+ void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
+
+ struct pipe_framebuffer_state saved_fb_state; /**< framebuffer state */
+
+ int saved_num_sampler_states;
+ void *saved_sampler_states[32];
+
+ int saved_num_textures;
+ struct pipe_texture *saved_textures[32]; /* is 32 enough? */
+};
+
+/**
+ * Create a blitter context.
+ */
+struct blitter_context *util_blitter_create(struct pipe_context *pipe);
+
+/**
+ * Destroy a blitter context.
+ */
+void util_blitter_destroy(struct blitter_context *blitter);
+
+/*
+ * These CSOs must be saved before any of the following functions is called:
+ * - blend state
+ * - depth stencil alpha state
+ * - rasterizer state
+ * - vertex shader
+ * - fragment shader
+ */
+
+/**
+ * Clear a specified set of currently bound buffers to specified values.
+ */
+void util_blitter_clear(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ unsigned num_cbufs,
+ unsigned clear_buffers,
+ const float *rgba,
+ double depth, unsigned stencil);
+
+/**
+ * Copy a block of pixels from one surface to another.
+ *
+ * You can copy from any color format to any other color format provided
+ * the former can be sampled and the latter can be rendered to. Otherwise,
+ * a software fallback path is taken and both surfaces must be of the same
+ * format.
+ *
+ * The same holds for depth-stencil formats with the exception that stencil
+ * cannot be copied unless you set ignore_stencil to FALSE. In that case,
+ * a software fallback path is taken and both surfaces must be of the same
+ * format.
+ *
+ * Use pipe_screen->is_format_supported to know your options.
+ *
+ * These states must be saved in the blitter in addition to the state objects
+ * already required to be saved:
+ * - framebuffer state
+ * - fragment sampler states
+ * - fragment sampler textures
+ */
+void util_blitter_copy(struct blitter_context *blitter,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface *src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height,
+ boolean ignore_stencil);
+
+/**
+ * Fill a region of a surface with a constant value.
+ *
+ * If the surface cannot be rendered to or it's a depth-stencil format,
+ * a software fallback path is taken.
+ *
+ * These states must be saved in the blitter in addition to the state objects
+ * already required to be saved:
+ * - framebuffer state
+ */
+void util_blitter_fill(struct blitter_context *blitter,
+ struct pipe_surface *dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ unsigned value);
+
+/**
+ * Copy all pixels from one surface to another.
+ *
+ * The rules are the same as in util_blitter_copy with the addition that
+ * surfaces must have the same size.
+ */
+static INLINE
+void util_blitter_copy_surface(struct blitter_context *blitter,
+ struct pipe_surface *dst,
+ struct pipe_surface *src,
+ boolean ignore_stencil)
+{
+ assert(dst->width == src->width && dst->height == src->height);
+
+ util_blitter_copy(blitter, dst, 0, 0, src, 0, 0, src->width, src->height,
+ ignore_stencil);
+}
+
+
+/* The functions below should be used to save currently bound constant state
+ * objects inside a driver. The objects are automatically restored at the end
+ * of the util_blitter_{clear, fill, copy, copy_surface} functions and then
+ * forgotten.
+ *
+ * CSOs not listed here are not affected by util_blitter. */
+
+static INLINE
+void util_blitter_save_blend(struct blitter_context *blitter,
+ void *state)
+{
+ blitter->saved_blend_state = state;
+}
+
+static INLINE
+void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
+ void *state)
+{
+ blitter->saved_dsa_state = state;
+}
+
+static INLINE
+void util_blitter_save_rasterizer(struct blitter_context *blitter,
+ void *state)
+{
+ blitter->saved_rs_state = state;
+}
+
+static INLINE
+void util_blitter_save_fragment_shader(struct blitter_context *blitter,
+ void *fs)
+{
+ blitter->saved_fs = fs;
+}
+
+static INLINE
+void util_blitter_save_vertex_shader(struct blitter_context *blitter,
+ void *vs)
+{
+ blitter->saved_vs = vs;
+}
+
+static INLINE
+void util_blitter_save_framebuffer(struct blitter_context *blitter,
+ struct pipe_framebuffer_state *state)
+{
+ blitter->saved_fb_state = *state;
+}
+
+static INLINE
+void util_blitter_save_fragment_sampler_states(
+ struct blitter_context *blitter,
+ int num_sampler_states,
+ void **sampler_states)
+{
+ assert(num_sampler_states <= Elements(blitter->saved_sampler_states));
+
+ blitter->saved_num_sampler_states = num_sampler_states;
+ memcpy(blitter->saved_sampler_states, sampler_states,
+ num_sampler_states * sizeof(void *));
+}
+
+static INLINE
+void util_blitter_save_fragment_sampler_textures(
+ struct blitter_context *blitter,
+ int num_textures,
+ struct pipe_texture **textures)
+{
+ assert(num_textures <= Elements(blitter->saved_textures));
+
+ blitter->saved_num_textures = num_textures;
+ memcpy(blitter->saved_textures, textures,
+ num_textures * sizeof(struct pipe_texture *));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c
new file mode 100644
index 00000000000..b42b429d4d7
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_dl.c
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 1999-2008 Brian Paul
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_OS_UNIX)
+#include <dlfcn.h>
+#endif
+#if defined(PIPE_OS_WINDOWS)
+#include <windows.h>
+#endif
+
+#include "u_dl.h"
+
+
+struct util_dl_library *
+util_dl_open(const char *filename)
+{
+#if defined(PIPE_OS_UNIX)
+ return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL);
+#elif defined(PIPE_OS_WINDOWS)
+ return (struct util_dl_library *)LoadLibraryA(filename);
+#else
+ return NULL;
+#endif
+}
+
+
+util_dl_proc
+util_dl_get_proc_address(struct util_dl_library *library,
+ const char *procname)
+{
+#if defined(PIPE_OS_UNIX)
+ return (util_dl_proc)dlsym((void *)library, procname);
+#elif defined(PIPE_OS_WINDOWS)
+ return (util_dl_proc)GetProcAddress((HMODULE)library, procname);
+#else
+ return (util_dl_proc)NULL;
+#endif
+}
+
+
+void
+util_dl_close(struct util_dl_library *library)
+{
+#if defined(PIPE_OS_UNIX)
+ dlclose((void *)library);
+#elif defined(PIPE_OS_WINDOWS)
+ FreeLibrary((HMODULE)library);
+#else
+ (void)library;
+#endif
+}
diff --git a/src/gallium/auxiliary/util/u_dl.h b/src/gallium/auxiliary/util/u_dl.h
new file mode 100644
index 00000000000..018b38543b0
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_dl.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef U_DL_H_
+#define U_DL_H_
+
+
+struct util_dl_library;
+
+
+typedef void (*util_dl_proc)(void);
+
+
+/**
+ * Open a library dynamically.
+ */
+struct util_dl_library *
+util_dl_open(const char *filename);
+
+
+/**
+ * Lookup a function in a library.
+ */
+util_dl_proc
+util_dl_get_proc_address(struct util_dl_library *library,
+ const char *procname);
+
+
+/**
+ * Close a library.
+ */
+void
+util_dl_close(struct util_dl_library *library);
+
+
+#endif /* U_DL_H_ */
diff --git a/src/gallium/auxiliary/util/u_format_access.py b/src/gallium/auxiliary/util/u_format_access.py
index eeb1a9657fd..0b05ddb9312 100644
--- a/src/gallium/auxiliary/util/u_format_access.py
+++ b/src/gallium/auxiliary/util/u_format_access.py
@@ -325,14 +325,14 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
elif swizzle == SWIZZLE_0:
value = '0'
elif swizzle == SWIZZLE_1:
- value = '1'
+ value = get_one(dst_type)
else:
assert False
elif format.colorspace == 'zs':
if i < 3:
value = 'z'
else:
- value = '1'
+ value = get_one(dst_type)
else:
assert False
print ' *dst_pixel++ = %s; /* %s */' % (value, 'rgba'[i])
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 83263d9fe64..69ff3b9dd9e 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -46,6 +46,7 @@
#include "util/u_gen_mipmap.h"
#include "util/u_simple_shaders.h"
#include "util/u_math.h"
+#include "util/u_texture.h"
#include "cso_cache/cso_context.h"
@@ -1317,7 +1318,7 @@ util_create_gen_mipmap(struct pipe_context *pipe,
}
/* fragment shader */
- ctx->fs = util_make_fragment_tex_shader(pipe);
+ ctx->fs = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
/* vertex data that doesn't change */
for (i = 0; i < 4; i++) {
@@ -1383,59 +1384,9 @@ set_vertex_data(struct gen_mipmap_state *ctx,
static const float st[4][2] = {
{0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
};
- float rx, ry, rz;
- uint i;
-
- /* loop over quad verts */
- for (i = 0; i < 4; i++) {
- /* Compute sc = +/-scale and tc = +/-scale.
- * Not +/-1 to avoid cube face selection ambiguity near the edges,
- * though that can still sometimes happen with this scale factor...
- */
- const float scale = 0.9999f;
- const float sc = (2.0f * st[i][0] - 1.0f) * scale;
- const float tc = (2.0f * st[i][1] - 1.0f) * scale;
-
- switch (face) {
- case PIPE_TEX_FACE_POS_X:
- rx = 1.0f;
- ry = -tc;
- rz = -sc;
- break;
- case PIPE_TEX_FACE_NEG_X:
- rx = -1.0f;
- ry = -tc;
- rz = sc;
- break;
- case PIPE_TEX_FACE_POS_Y:
- rx = sc;
- ry = 1.0f;
- rz = tc;
- break;
- case PIPE_TEX_FACE_NEG_Y:
- rx = sc;
- ry = -1.0f;
- rz = -tc;
- break;
- case PIPE_TEX_FACE_POS_Z:
- rx = sc;
- ry = -tc;
- rz = 1.0f;
- break;
- case PIPE_TEX_FACE_NEG_Z:
- rx = -sc;
- ry = -tc;
- rz = -1.0f;
- break;
- default:
- rx = ry = rz = 0.0f;
- assert(0);
- }
- ctx->vertices[i][1][0] = rx; /*s*/
- ctx->vertices[i][1][1] = ry; /*t*/
- ctx->vertices[i][1][2] = rz; /*r*/
- }
+ util_map_texcoords2d_onto_cubemap(face, &st[0][0], 2,
+ &ctx->vertices[0][1][0], 8);
}
else {
/* 1D/2D */
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 1c8b157d91f..8172ead0201 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -2,6 +2,7 @@
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009 Marek Olšák <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -30,6 +31,7 @@
* Simple vertex/fragment shader generators.
*
* @author Brian Paul
+ Marek Olšák
*/
@@ -87,6 +89,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
*/
void *
util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
+ unsigned tex_target,
unsigned writemask )
{
struct ureg_program *ureg;
@@ -116,20 +119,63 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
ureg_TEX( ureg,
ureg_writemask(out, writemask),
- TGSI_TEXTURE_2D, tex, sampler );
+ tex_target, tex, sampler );
ureg_END( ureg );
return ureg_create_shader_and_destroy( ureg, pipe );
}
void *
-util_make_fragment_tex_shader(struct pipe_context *pipe )
+util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target )
{
return util_make_fragment_tex_shader_writemask( pipe,
+ tex_target,
TGSI_WRITEMASK_XYZW );
}
+/**
+ * Make a simple fragment texture shader which reads an X component from
+ * a texture and writes it as depth.
+ */
+void *
+util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
+ unsigned tex_target)
+{
+ struct ureg_program *ureg;
+ struct ureg_src sampler;
+ struct ureg_src tex;
+ struct ureg_dst out, depth;
+ struct ureg_src imm;
+ ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
+ if (ureg == NULL)
+ return NULL;
+
+ sampler = ureg_DECL_sampler( ureg, 0 );
+
+ tex = ureg_DECL_fs_input( ureg,
+ TGSI_SEMANTIC_GENERIC, 0,
+ TGSI_INTERPOLATE_PERSPECTIVE );
+
+ out = ureg_DECL_output( ureg,
+ TGSI_SEMANTIC_COLOR,
+ 0 );
+
+ depth = ureg_DECL_output( ureg,
+ TGSI_SEMANTIC_POSITION,
+ 0 );
+
+ imm = ureg_imm4f( ureg, 0, 0, 0, 1 );
+
+ ureg_MOV( ureg, out, imm );
+
+ ureg_TEX( ureg,
+ ureg_writemask(depth, TGSI_WRITEMASK_Z),
+ tex_target, tex, sampler );
+ ureg_END( ureg );
+
+ return ureg_create_shader_and_destroy( ureg, pipe );
+}
/**
* Make simple fragment color pass-through shader.
@@ -137,9 +183,18 @@ util_make_fragment_tex_shader(struct pipe_context *pipe )
void *
util_make_fragment_passthrough_shader(struct pipe_context *pipe)
{
+ return util_make_fragment_clonecolor_shader(pipe, 1);
+}
+
+void *
+util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs)
+{
struct ureg_program *ureg;
struct ureg_src src;
- struct ureg_dst dst;
+ struct ureg_dst dst[8];
+ int i;
+
+ assert(num_cbufs <= 8);
ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
if (ureg == NULL)
@@ -148,12 +203,13 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe)
src = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_COLOR, 0,
TGSI_INTERPOLATE_PERSPECTIVE );
- dst = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 );
+ for (i = 0; i < num_cbufs; i++)
+ dst[i] = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, i );
+
+ for (i = 0; i < num_cbufs; i++)
+ ureg_MOV( ureg, dst[i], src );
- ureg_MOV( ureg, dst, src );
ureg_END( ureg );
return ureg_create_shader_and_destroy( ureg, pipe );
}
-
-
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h
index d2e80d6eb4d..6e760942e25 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -51,16 +51,25 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
extern void *
util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
- unsigned writemask );
+ unsigned tex_target,
+ unsigned writemask);
extern void *
-util_make_fragment_tex_shader(struct pipe_context *pipe);
+util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target);
+
+
+extern void *
+util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
+ unsigned tex_target);
extern void *
util_make_fragment_passthrough_shader(struct pipe_context *pipe);
+extern void *
+util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/util/u_texture.c b/src/gallium/auxiliary/util/u_texture.c
new file mode 100644
index 00000000000..cd477ab640f
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_texture.c
@@ -0,0 +1,102 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
+ * Copyright 2009 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture mapping utility functions.
+ *
+ * @author Brian Paul
+ * Marek Olšák
+ */
+
+#include "pipe/p_defines.h"
+
+#include "util/u_texture.h"
+
+void util_map_texcoords2d_onto_cubemap(unsigned face,
+ const float *in_st, unsigned in_stride,
+ float *out_str, unsigned out_stride)
+{
+ int i;
+ float rx, ry, rz;
+
+ /* loop over quad verts */
+ for (i = 0; i < 4; i++) {
+ /* Compute sc = +/-scale and tc = +/-scale.
+ * Not +/-1 to avoid cube face selection ambiguity near the edges,
+ * though that can still sometimes happen with this scale factor...
+ */
+ const float scale = 0.9999f;
+ const float sc = (2 * in_st[0] - 1) * scale;
+ const float tc = (2 * in_st[1] - 1) * scale;
+
+ switch (face) {
+ case PIPE_TEX_FACE_POS_X:
+ rx = 1;
+ ry = -tc;
+ rz = -sc;
+ break;
+ case PIPE_TEX_FACE_NEG_X:
+ rx = -1;
+ ry = -tc;
+ rz = sc;
+ break;
+ case PIPE_TEX_FACE_POS_Y:
+ rx = sc;
+ ry = 1;
+ rz = tc;
+ break;
+ case PIPE_TEX_FACE_NEG_Y:
+ rx = sc;
+ ry = -1;
+ rz = -tc;
+ break;
+ case PIPE_TEX_FACE_POS_Z:
+ rx = sc;
+ ry = -tc;
+ rz = 1;
+ break;
+ case PIPE_TEX_FACE_NEG_Z:
+ rx = -sc;
+ ry = -tc;
+ rz = -1;
+ break;
+ default:
+ rx = ry = rz = 0;
+ assert(0);
+ }
+
+ out_str[0] = rx; /*s*/
+ out_str[1] = ry; /*t*/
+ out_str[2] = rz; /*r*/
+
+ in_st += in_stride;
+ out_str += out_stride;
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_texture.h b/src/gallium/auxiliary/util/u_texture.h
new file mode 100644
index 00000000000..93b2f1e4c97
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_texture.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_TEXTURE_H
+#define U_TEXTURE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Convert 2D texture coordinates of 4 vertices into cubemap coordinates
+ * in the given face.
+ * Coordinates must be in the range [0,1].
+ *
+ * \param face Cubemap face.
+ * \param in_st 4 pairs of 2D texture coordinates to convert.
+ * \param in_stride Stride of in_st in floats.
+ * \param out_str STR cubemap texture coordinates to compute.
+ * \param out_stride Stride of out_str in floats.
+ */
+void util_map_texcoords2d_onto_cubemap(unsigned face,
+ const float *in_st, unsigned in_stride,
+ float *out_str, unsigned out_stride);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 9c59677a741..eea6b5d6a5c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -629,7 +629,8 @@ lp_build_abs(struct lp_build_context *bld,
if(type.floating) {
/* Mask out the sign bit */
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+ unsigned long absMask = ~(1 << (type.width - 1));
+ LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
a = LLVMBuildAnd(bld->builder, a, mask, "");
a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
@@ -1083,7 +1084,7 @@ lp_build_log(struct lp_build_context *bld,
LLVMValueRef x)
{
/* log(2) */
- LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634);
+ LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529);
return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
}
@@ -1095,7 +1096,7 @@ lp_build_log(struct lp_build_context *bld,
/**
* Generate polynomial.
- * Ex: x^2 * coeffs[0] + x * coeffs[1] + coeffs[2].
+ * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
*/
static LLVMValueRef
lp_build_polynomial(struct lp_build_context *bld,
@@ -1285,13 +1286,13 @@ lp_build_log2_approx(struct lp_build_context *bld,
/* mant = (float) mantissa(x) */
mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
mant = LLVMBuildOr(bld->builder, mant, one, "");
- mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
+ mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");
logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
Elements(lp_build_log2_polynomial));
/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 3eb0e0c57cb..a67c70ff25a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -763,7 +763,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
tmp0 = lp_build_floor(&bld->base, src0);
- tmp0 = lp_build_sub(&bld->base, tmp0, src0);
+ tmp0 = lp_build_sub(&bld->base, src0, tmp0);
dst0[chan_index] = tmp0;
}
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
new file mode 100644
index 00000000000..74b27574942
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_DEBUG_H
+#define LP_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+extern void
+st_print_current(void);
+
+
+#define DEBUG_PIPE 0x1
+#define DEBUG_TGSI 0x2
+#define DEBUG_TEX 0x4
+#define DEBUG_ASM 0x8
+#define DEBUG_SETUP 0x10
+#define DEBUG_RAST 0x20
+#define DEBUG_QUERY 0x40
+#define DEBUG_SCREEN 0x80
+#define DEBUG_JIT 0x100
+
+#ifdef DEBUG
+extern int LP_DEBUG;
+#else
+#define LP_DEBUG 0
+#endif
+
+void st_debug_init( void );
+
+static INLINE void
+LP_DBG( unsigned flag, const char *fmt, ... )
+{
+ if (LP_DEBUG & flag)
+ {
+ va_list args;
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+ }
+}
+
+
+#endif /* LP_DEBUG_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 19fe2850fd1..9b47415f003 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -36,6 +36,24 @@
#include "lp_winsys.h"
#include "lp_jit.h"
#include "lp_screen.h"
+#include "lp_debug.h"
+
+#ifdef DEBUG
+int LP_DEBUG = 0;
+
+static const struct debug_named_value lp_debug_flags[] = {
+ { "pipe", DEBUG_PIPE },
+ { "tgsi", DEBUG_TGSI },
+ { "tex", DEBUG_TEX },
+ { "asm", DEBUG_ASM },
+ { "setup", DEBUG_SETUP },
+ { "rast", DEBUG_RAST },
+ { "query", DEBUG_QUERY },
+ { "screen", DEBUG_SCREEN },
+ { "jit", DEBUG_JIT },
+ {NULL, 0}
+};
+#endif
static const char *
@@ -259,6 +277,10 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys)
{
struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen);
+#ifdef DEBUG
+ LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 );
+#endif
+
if (!screen)
return NULL;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index ee0f69b2af9..22683ff8b42 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -87,6 +87,7 @@
#include "lp_state.h"
#include "lp_quad.h"
#include "lp_tex_sample.h"
+#include "lp_debug.h"
static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
@@ -408,59 +409,58 @@ generate_fragment(struct llvmpipe_context *lp,
unsigned i;
unsigned chan;
-#ifdef DEBUG
- tgsi_dump(shader->base.tokens, 0);
- if(key->depth.enabled) {
- debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
- debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
- debug_printf("depth.writemask = %u\n", key->depth.writemask);
- }
- if(key->alpha.enabled) {
- debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
- debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
- }
- if(key->blend.logicop_enable) {
- debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
- }
- else if(key->blend.blend_enable) {
- debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE));
- debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
- debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
- debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE));
- debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
- debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
- }
- debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
- for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
- if(key->sampler[i].format) {
- debug_printf("sampler[%u] = \n", i);
- debug_printf(" .format = %s\n",
- pf_name(key->sampler[i].format));
- debug_printf(" .target = %s\n",
- debug_dump_tex_target(key->sampler[i].target, TRUE));
- debug_printf(" .pot = %u %u %u\n",
- key->sampler[i].pot_width,
- key->sampler[i].pot_height,
- key->sampler[i].pot_depth);
- debug_printf(" .wrap = %s %s %s\n",
- debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
- debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
- debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
- debug_printf(" .min_img_filter = %s\n",
- debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
- debug_printf(" .min_mip_filter = %s\n",
- debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
- debug_printf(" .mag_img_filter = %s\n",
- debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
- if(key->sampler[i].compare_mode)
- debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
- debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
- debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
+ if (LP_DEBUG & DEBUG_JIT) {
+ tgsi_dump(shader->base.tokens, 0);
+ if(key->depth.enabled) {
+ debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
+ debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+ debug_printf("depth.writemask = %u\n", key->depth.writemask);
+ }
+ if(key->alpha.enabled) {
+ debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+ debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+ }
+ if(key->blend.logicop_enable) {
+ debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
+ }
+ else if(key->blend.blend_enable) {
+ debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE));
+ debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+ debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+ debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE));
+ debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+ debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+ }
+ debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+ if(key->sampler[i].format) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ pf_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ debug_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if(key->sampler[i].compare_mode)
+ debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
+ }
}
}
-#endif
-
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
if(!variant)
return NULL;
@@ -599,8 +599,8 @@ generate_fragment(struct llvmpipe_context *lp,
}
lp_build_conv_mask(builder, fs_type, blend_type,
- fs_mask, num_fs,
- &blend_mask, 1);
+ fs_mask, num_fs,
+ &blend_mask, 1);
/*
* Blending.
@@ -631,16 +631,15 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMRunFunctionPassManager(screen->pass, variant->function);
-#ifdef DEBUG
- LLVMDumpValue(variant->function);
- debug_printf("\n");
-#endif
+ if (LP_DEBUG & DEBUG_JIT) {
+ LLVMDumpValue(variant->function);
+ debug_printf("\n");
+ }
variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
-#ifdef DEBUG
- lp_disassemble(variant->jit_function);
-#endif
+ if (LP_DEBUG & DEBUG_ASM)
+ lp_disassemble(variant->jit_function);
variant->next = shader->variants;
shader->variants = variant;
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index 62990f9b6a6..9aee9e49566 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -112,20 +112,30 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
{
struct nouveau_pushbuf *pb = chan->pushbuf;
unsigned nr, i;
+ int ret = 0;
nr = so->cur - so->push;
- if (pb->remaining < nr)
- nouveau_pushbuf_flush(chan, nr);
+ /* This will flush if we need space.
+ * We don't actually need the marker.
+ */
+ if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
+ debug_printf("so_emit failed marker emit with error %d\n", ret);
+ return;
+ }
pb->remaining -= nr;
memcpy(pb->cur, so->push, nr * 4);
for (i = 0; i < so->cur_reloc; i++) {
struct nouveau_stateobj_reloc *r = &so->reloc[i];
- nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
+ if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
r->bo, r->data, 0, r->flags,
- r->vor, r->tor);
+ r->vor, r->tor))) {
+ debug_printf("so_emit failed reloc with error %d\n", ret);
+ goto out;
+ }
}
+out:
pb->cur += nr;
}
@@ -134,26 +144,45 @@ so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
{
struct nouveau_pushbuf *pb = chan->pushbuf;
unsigned i;
+ int ret = 0;
if (!so)
return;
i = so->cur_reloc << 1;
- if (pb->remaining < i)
- nouveau_pushbuf_flush(chan, i);
+ /* This will flush if we need space.
+ * We don't actually need the marker.
+ */
+ if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) {
+ debug_printf("so_emit_reloc_markers failed marker emit with" \
+ "error %d\n", ret);
+ return;
+ }
pb->remaining -= i;
for (i = 0; i < so->cur_reloc; i++) {
struct nouveau_stateobj_reloc *r = &so->reloc[i];
- nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->packet, 0,
+ if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+ r->packet, 0,
(r->flags & (NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART |
NOUVEAU_BO_RDWR)) |
- NOUVEAU_BO_DUMMY, 0, 0);
- nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->data, 0,
+ NOUVEAU_BO_DUMMY, 0, 0))) {
+ debug_printf("so_emit_reloc_markers failed reloc" \
+ "with error %d\n", ret);
+ pb->remaining += ((so->cur_reloc - i) << 1);
+ return;
+ }
+ if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+ r->data, 0,
r->flags | NOUVEAU_BO_DUMMY,
- r->vor, r->tor);
+ r->vor, r->tor))) {
+ debug_printf("so_emit_reloc_markers failed reloc" \
+ "with error %d\n", ret);
+ pb->remaining += ((so->cur_reloc - i) << 1) - 1;
+ return;
+ }
}
}
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 932893eef59..3020806c5d6 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -133,6 +133,9 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
assert(sub_w == w || util_is_pot(sub_w));
assert(sub_h == h || util_is_pot(sub_h));
+ MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
+ ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
+
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
OUT_RELOCo(chan, dst_bo,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -202,7 +205,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
unsigned src_offset = src->offset + sy * src_pitch +
sx * pf_get_blocksize(src->texture->format);
- WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
+ MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
OUT_RELOCo(chan, src_bo,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -250,7 +253,7 @@ nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
if (format < 0)
return 1;
- WAIT_RING (chan, 12);
+ MARK_RING (chan, 12, 4);
BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -315,7 +318,7 @@ nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
gdirect_format = nv04_rect_format(dst->format);
assert(gdirect_format >= 0);
- WAIT_RING (chan, 16);
+ MARK_RING (chan, 16, 4);
BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index d8300fd69f6..46a821a48b1 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -58,6 +58,9 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced;
nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
+ screen->base.channel->user_private = nv30;
+ screen->base.channel->flush_notify = nv30_state_flush_notify;
+
nv30_init_query_functions(nv30);
nv30_init_surface_functions(nv30);
nv30_init_state_functions(nv30);
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 8d49366dfcb..6f44b1c7fe1 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -184,6 +184,7 @@ extern void nv30_fragtex_bind(struct nv30_context *);
/* nv30_state.c and friends */
extern boolean nv30_state_validate(struct nv30_context *nv30);
extern void nv30_state_emit(struct nv30_context *nv30);
+extern void nv30_state_flush_notify(struct nouveau_channel *chan);
extern struct nv30_state_entry nv30_state_rasterizer;
extern struct nv30_state_entry nv30_state_scissor;
extern struct nv30_state_entry nv30_state_stipple;
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
index 621b8846c8e..ac52d946f02 100644
--- a/src/gallium/drivers/nv30/nv30_state_emit.c
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -41,7 +41,7 @@ nv30_state_emit(struct nv30_context *nv30)
struct nouveau_channel *chan = nv30->screen->base.channel;
struct nv30_state *state = &nv30->state;
struct nv30_screen *screen = nv30->screen;
- unsigned i, samplers;
+ unsigned i;
uint64_t states;
if (nv30->pctx_id != screen->cur_pctx) {
@@ -63,6 +63,14 @@ nv30_state_emit(struct nv30_context *nv30)
}
state->dirty = 0;
+}
+
+void
+nv30_state_flush_notify(struct nouveau_channel *chan)
+{
+ struct nv30_context *nv30 = chan->user_private;
+ struct nv30_state *state = &nv30->state;
+ unsigned i, samplers;
so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index 7f008274a4e..eb9cce4c786 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -58,6 +58,9 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced;
nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
+ screen->base.channel->user_private = nv40;
+ screen->base.channel->flush_notify = nv40_state_flush_notify;
+
nv40_init_query_functions(nv40);
nv40_init_surface_functions(nv40);
nv40_init_state_functions(nv40);
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index a3d594167aa..cf33b64a86d 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -204,6 +204,7 @@ extern void nv40_fragtex_bind(struct nv40_context *);
extern boolean nv40_state_validate(struct nv40_context *nv40);
extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
extern void nv40_state_emit(struct nv40_context *nv40);
+extern void nv40_state_flush_notify(struct nouveau_channel *chan);
extern struct nv40_state_entry nv40_state_rasterizer;
extern struct nv40_state_entry nv40_state_scissor;
extern struct nv40_state_entry nv40_state_stipple;
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 198692965dc..ba0fbcb26a9 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -57,7 +57,7 @@ nv40_state_emit(struct nv40_context *nv40)
struct nouveau_channel *chan = nv40->screen->base.channel;
struct nv40_state *state = &nv40->state;
struct nv40_screen *screen = nv40->screen;
- unsigned i, samplers;
+ unsigned i;
uint64_t states;
if (nv40->pctx_id != screen->cur_pctx) {
@@ -87,6 +87,14 @@ nv40_state_emit(struct nv40_context *nv40)
}
state->dirty = 0;
+}
+
+void
+nv40_state_flush_notify(struct nouveau_channel *chan)
+{
+ struct nv40_context *nv40 = chan->user_private;
+ struct nv40_state *state = &nv40->state;
+ unsigned i, samplers;
so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 79135f2f362..5578a5838fb 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -126,7 +126,7 @@ struct nv50_state {
unsigned viewport_bypass;
struct nouveau_stateobj *tsc_upload;
struct nouveau_stateobj *tic_upload;
- unsigned miptree_nr;
+ unsigned miptree_nr[PIPE_SHADER_TYPES];
struct nouveau_stateobj *vertprog;
struct nouveau_stateobj *fragprog;
struct nouveau_stateobj *programs;
@@ -162,10 +162,10 @@ struct nv50_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS];
- unsigned sampler_nr;
- struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
- unsigned miptree_nr;
+ struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ unsigned sampler_nr[PIPE_SHADER_TYPES];
+ struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ unsigned miptree_nr[PIPE_SHADER_TYPES];
uint16_t vbo_fifo;
};
@@ -218,7 +218,7 @@ extern void nv50_state_flush_notify(struct nouveau_channel *chan);
extern void nv50_so_init_sifc(struct nv50_context *nv50,
struct nouveau_stateobj *so,
struct nouveau_bo *bo, unsigned reloc,
- unsigned size);
+ unsigned offset, unsigned size);
/* nv50_tex.c */
extern void nv50_tex_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 40ee6659993..9e083b662dd 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -55,6 +55,20 @@ get_tile_mode(unsigned ny, unsigned d)
return tile_mode | 0x10;
}
+static INLINE unsigned
+get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h)
+{
+ unsigned tile_h = get_tile_height(tile_mode);
+ unsigned tile_d = get_tile_depth(tile_mode);
+
+ /* pitch_2d == to next slice within this volume-tile */
+ /* pitch_3d == size (in bytes) of a volume-tile */
+ unsigned pitch_2d = tile_h * 64;
+ unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch;
+
+ return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
static struct pipe_texture *
nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
{
@@ -130,6 +144,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
mt->level[0].tile_mode, tile_flags,
&mt->base.bo);
if (ret) {
+ for (l = 0; l < pt->last_level; ++l)
+ FREE(mt->level[l].image_offset);
FREE(mt);
return NULL;
}
@@ -169,6 +185,10 @@ static void
nv50_miptree_destroy(struct pipe_texture *pt)
{
struct nv50_miptree *mt = nv50_miptree(pt);
+ unsigned l;
+
+ for (l = 0; l < pt->last_level; ++l)
+ FREE(mt->level[l].image_offset);
nouveau_bo_ref(NULL, &mt->base.bo);
FREE(mt);
@@ -182,15 +202,10 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_miptree_level *lvl = &mt->level[level];
struct pipe_surface *ps;
- int img;
+ unsigned img = 0;
if (pt->target == PIPE_TEXTURE_CUBE)
img = face;
- else
- if (pt->target == PIPE_TEXTURE_3D)
- img = zslice;
- else
- img = 0;
ps = CALLOC_STRUCT(pipe_surface);
if (!ps)
@@ -206,6 +221,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ps->zslice = zslice;
ps->offset = lvl->image_offset[img];
+ if (pt->target == PIPE_TEXTURE_3D) {
+ unsigned nb_h = pf_get_nblocksy(pt->format, ps->height);
+ ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
+ lvl->pitch, nb_h);
+ }
+
return ps;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index f0fe7e61684..e496cf4cad8 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -119,7 +119,7 @@ struct nv50_pc {
struct nv50_reg *param;
int param_nr;
struct nv50_reg *immd;
- float *immd_buf;
+ uint32_t *immd_buf;
int immd_nr;
struct nv50_reg **addr;
int addr_nr;
@@ -131,6 +131,9 @@ struct nv50_pc {
struct nv50_reg *r_brdc;
struct nv50_reg *r_dst[4];
+ struct nv50_reg reg_instances[16];
+ unsigned reg_instance_nr;
+
unsigned interp_mode[32];
/* perspective interpolation registers */
struct nv50_reg *iv_p;
@@ -150,6 +153,20 @@ struct nv50_pc {
boolean allow32;
};
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ struct nv50_reg *ri;
+
+ assert(pc->reg_instance_nr < 16);
+ ri = &pc->reg_instances[pc->reg_instance_nr++];
+ if (reg) {
+ *ri = *reg;
+ reg->mod = 0;
+ }
+ return ri;
+}
+
static INLINE void
ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
{
@@ -342,25 +359,34 @@ static void
kill_temp_temp(struct nv50_pc *pc)
{
int i;
-
+
for (i = 0; i < pc->temp_temp_nr; i++)
free_temp(pc, pc->temp_temp[i]);
pc->temp_temp_nr = 0;
}
static int
-ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+ctor_immd_4u32(struct nv50_pc *pc,
+ uint32_t x, uint32_t y, uint32_t z, uint32_t w)
{
- pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)),
- (pc->immd_nr + 1) * 4 * sizeof(float));
+ unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
+
+ pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
+
pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-
+
return pc->immd_nr++;
}
+static INLINE int
+ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
+{
+ return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
+}
+
static struct nv50_reg *
alloc_immd(struct nv50_pc *pc, float f)
{
@@ -368,11 +394,11 @@ alloc_immd(struct nv50_pc *pc, float f)
unsigned hw;
for (hw = 0; hw < pc->immd_nr * 4; hw++)
- if (pc->immd_buf[hw] == f)
+ if (pc->immd_buf[hw] == fui(f))
break;
if (hw == pc->immd_nr * 4)
- hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
+ hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
ctor_reg(r, P_IMMD, -1, hw);
return r;
@@ -464,22 +490,24 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- unsigned val;
- float f = pc->immd_buf[imm->hw];
+ union {
+ float f;
+ uint32_t ui;
+ } u;
+ u.ui = pc->immd_buf[imm->hw];
- if (imm->mod & NV50_MOD_ABS)
- f = fabsf(f);
- val = fui((imm->mod & NV50_MOD_NEG) ? -f : f);
+ u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
+ u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
set_long(pc, e);
- /*XXX: can't be predicated - bits overlap.. catch cases where both
- * are required and avoid them. */
+ /* XXX: can't be predicated - bits overlap; cases where both
+ * are required should be avoided by using pc->allow32 */
set_pred(pc, 0, 0, e);
set_pred_wr(pc, 0, 0, e);
e->inst[1] |= 0x00000002 | 0x00000001;
- e->inst[0] |= (val & 0x3f) << 16;
- e->inst[1] |= (val >> 6) << 2;
+ e->inst[0] |= (u.ui & 0x3f) << 16;
+ e->inst[1] |= (u.ui >> 6) << 2;
}
static INLINE void
@@ -644,7 +672,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
if (src->type == P_IMMD || src->type == P_CONST) {
set_long(pc, e);
set_data(pc, src, 0x7f, 9, e);
- e->inst[1] |= 0x20000000; /* src0 const? */
+ e->inst[1] |= 0x20000000; /* mov from c[] */
} else {
if (src->type == P_ATTR) {
set_long(pc, e);
@@ -659,9 +687,9 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
if (is_long(e) && !is_immd(e)) {
e->inst[1] |= 0x04000000; /* 32-bit */
- e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */
+ e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
if (!(e->inst[1] & 0x20000000))
- e->inst[1] |= 0x00030000; /* "subsubop" 0xf */
+ e->inst[1] |= 0x00030000; /* lane mask 2:3 */
} else
e->inst[0] |= 0x00008000;
@@ -676,6 +704,17 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
FREE(imm);
}
+static void
+emit_nop(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xf0000000;
+ set_long(pc, e);
+ e->inst[1] = 0xe0000000;
+ emit(pc, e);
+}
+
static boolean
check_swap_src_0_1(struct nv50_pc *pc,
struct nv50_reg **s0, struct nv50_reg **s1)
@@ -795,6 +834,33 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
static void
+emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ assert(dst->type == P_TEMP);
+ e->inst[1] = 0x20000000 | (pred << 12);
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x000001fc;
+ e->inst[1] = 0xa0000008;
+ set_long(pc, e);
+ set_pred_wr(pc, 1, pred, e);
+ set_src_0_restricted(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
@@ -898,7 +964,6 @@ static INLINE void
emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
- assert(src0 != src1);
src1->mod ^= NV50_MOD_NEG;
emit_add(pc, dst, src0, src1);
src1->mod ^= NV50_MOD_NEG;
@@ -967,7 +1032,6 @@ static INLINE void
emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
{
- assert(src2 != src0 && src2 != src1);
src2->mod ^= NV50_MOD_NEG;
emit_mad(pc, dst, src0, src1, src2);
src2->mod ^= NV50_MOD_NEG;
@@ -1257,9 +1321,68 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
emit(pc, e);
}
+static struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
+ struct nv50_program_exec **join)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ if (join) {
+ set_long(pc, e);
+ e->inst[0] |= 0xa0000002;
+ emit(pc, e);
+ *join = e;
+ e = exec(pc);
+ }
+
+ set_long(pc, e);
+ e->inst[0] |= 0x10000002;
+ if (pred >= 0)
+ set_pred(pc, cc, pred, e);
+ emit(pc, e);
+ return pc->p->exec_tail;
+}
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV_SRC1 3
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+static void
+emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0,
+ struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xc0000000;
+ e->inst[1] = 0x80000000;
+ set_long(pc, e);
+ e->inst[0] |= lane_src0 << 16;
+ set_src_0(pc, src0, e);
+ set_src_2(pc, src1, e);
+
+ if (wp >= 0)
+ set_pred_wr(pc, 1, wp, e);
+
+ if (dst)
+ set_dst(pc, dst, e);
+ else {
+ e->inst[0] |= 0x000001fc;
+ e->inst[1] |= 0x00000008;
+ }
+
+ e->inst[0] |= (qop & 3) << 20;
+ e->inst[1] |= (qop >> 2) << 22;
+
+ emit(pc, e);
+}
+
static void
load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
- struct nv50_reg **src, boolean proj)
+ struct nv50_reg **src, unsigned arg, boolean proj)
{
int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
@@ -1276,6 +1399,10 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
if (proj && 0 /* looks more correct without this */)
emit_mul(pc, t[2], t[2], src[3]);
+ else
+ if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
+ emit_mov(pc, t[3], src[3]);
+
emit_flop(pc, 0, t[2], t[2]);
emit_mul(pc, t[0], src[0], t[2]);
@@ -1284,89 +1411,214 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
}
static void
-emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
- struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+ struct nv50_reg **src, unsigned dim, unsigned arg)
{
- struct nv50_reg *t[4];
- struct nv50_program_exec *e;
+ unsigned c, mode;
+
+ if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+ mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE;
- unsigned c, mode, dim;
+ t[3]->rhw = src[3]->rhw;
+ emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+ emit_flop(pc, 0, t[3], t[3]);
+ for (c = 0; c < dim; ++c) {
+ t[c]->rhw = src[c]->rhw;
+ emit_interp(pc, t[c], t[3], mode);
+ }
+ if (arg != dim) { /* depth reference value */
+ t[dim]->rhw = src[2]->rhw;
+ emit_interp(pc, t[dim], t[3], mode);
+ }
+ } else {
+ /* XXX: for some reason the blob sometimes uses MAD
+ * (mad f32 $rX $rY $rZ neg $r63)
+ */
+ emit_flop(pc, 0, t[3], src[3]);
+ for (c = 0; c < dim; ++c)
+ emit_mul(pc, t[c], src[c], t[3]);
+ if (arg != dim) /* depth reference value */
+ emit_mul(pc, t[dim], src[2], t[3]);
+ }
+}
+
+static INLINE void
+get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
+{
switch (type) {
case TGSI_TEXTURE_1D:
- dim = 1;
+ *arg = *dim = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ *dim = 1;
+ *arg = 2;
break;
case TGSI_TEXTURE_UNKNOWN:
case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
case TGSI_TEXTURE_RECT:
- dim = 2;
+ *arg = *dim = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ *dim = 2;
+ *arg = 3;
break;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT: /* XXX */
- dim = 3;
+ *dim = *arg = 3;
break;
default:
assert(0);
break;
}
+}
- /* some cards need t[0]'s hw index to be a multiple of 4 */
- alloc_temp4(pc, t, 0);
+/* We shouldn't execute TEXLOD if any of the pixels in a quad have
+ * different LOD values, so branch off groups of equal LOD.
+ */
+static void
+emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
+ struct nv50_reg *src, struct nv50_program_exec *tex)
+{
+ struct nv50_program_exec *join_at;
+ unsigned i, target = pc->p->exec_size + 7 * 2;
- if (type == TGSI_TEXTURE_CUBE) {
- load_cube_tex_coords(pc, t, src, proj);
- } else
- if (proj) {
- if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
- mode = pc->interp_mode[src[0]->index];
-
- t[3]->rhw = src[3]->rhw;
- emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
- emit_flop(pc, 0, t[3], t[3]);
-
- for (c = 0; c < dim; c++) {
- t[c]->rhw = src[c]->rhw;
- emit_interp(pc, t[c], t[3],
- (mode | INTERP_PERSPECTIVE));
- }
- } else {
- emit_flop(pc, 0, t[3], src[3]);
- for (c = 0; c < dim; c++)
- emit_mul(pc, t[c], src[c], t[3]);
+ /* Subtract lod of each pixel from lod of top left pixel, jump
+ * texlod insn if result is 0, then repeat for 2 other pixels.
+ */
+ emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
+ emit_branch(pc, 0, 2, &join_at)->param.index = target;
- /* XXX: for some reason the blob sometimes uses MAD:
- * emit_mad(pc, t[c], src[0][c], t[3], t[3])
- * pc->p->exec_tail->inst[1] |= 0x080fc000;
- */
+ for (i = 1; i < 4; ++i) {
+ emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
+ emit_branch(pc, 0, 2, NULL)->param.index = target;
+ }
+
+ emit_mov(pc, tlod, src); /* target */
+ emit(pc, tex); /* texlod */
+
+ join_at->param.index = target + 2 * 2;
+ emit_nop(pc);
+ pc->p->exec_tail->inst[1] |= 2; /* join _after_ tex */
+}
+
+static void
+emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg,
+ struct nv50_program_exec *tex)
+{
+ struct nv50_program_exec *e;
+ struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL);
+ int r_pred = 0;
+ unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 };
+
+ pc->allow32 = FALSE;
+ ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4);
+
+ /* Subtract bias value of thread i from bias values of each thread,
+ * store result in r_pred, and set bit i in r_bits if result was 0.
+ */
+ assert(arg < 4);
+ for (i = 0; i < 4; ++i, ++imm_1248.hw) {
+ emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55);
+ emit_mov(pc, r_bits, &imm_1248);
+ set_pred(pc, 2, r_pred, pc->p->exec_tail);
+ }
+ emit_mov_to_pred(pc, r_pred, r_bits);
+
+ /* The lanes of a quad are now grouped by the bit in r_pred they have
+ * set. Put the input values for TEX into a new register set for each
+ * group and execute TEX only for a specific group.
+ * We cannot use the same register set for each group because we need
+ * the derivatives, which are implicitly calculated, to be correct.
+ */
+ for (i = 1; i < 4; ++i) {
+ alloc_temp4(pc, t123[i], 0);
+
+ for (c = 0; c <= arg; ++c)
+ emit_mov(pc, t123[i][c], t[c]);
+
+ *(e = exec(pc)) = *(tex);
+ e->inst[0] &= ~0x01fc;
+ set_dst(pc, t123[i][0], e);
+ set_pred(pc, cc[i], r_pred, e);
+ emit(pc, e);
+ }
+ /* finally TEX on the original regs (where we kept the input) */
+ set_pred(pc, cc[0], r_pred, tex);
+ emit(pc, tex);
+
+ /* put the 3 * n other results into regs for lane 0 */
+ n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc));
+ for (i = 1; i < 4; ++i) {
+ for (c = 0; c < n; ++c) {
+ emit_mov(pc, t[c], t123[i][c]);
+ set_pred(pc, cc[i], r_pred, pc->p->exec_tail);
}
- } else {
- for (c = 0; c < dim; c++)
- emit_mov(pc, t[c], src[c]);
+ free_temp4(pc, t123[i]);
}
+ emit_nop(pc);
+ free_temp(pc, r_bits);
+}
+
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src, unsigned unit, unsigned type,
+ boolean proj, int bias_lod)
+{
+ struct nv50_reg *t[4];
+ struct nv50_program_exec *e;
+ unsigned c, dim, arg;
+
+ /* t[i] must be within a single 128 bit super-reg */
+ alloc_temp4(pc, t, 0);
+
e = exec(pc);
+ e->inst[0] = 0xf0000000;
set_long(pc, e);
- e->inst[0] |= 0xf0000000;
- e->inst[1] |= 0x00000004;
set_dst(pc, t[0], e);
- e->inst[0] |= (unit << 9);
- if (dim == 2)
- e->inst[0] |= 0x00400000;
- else
- if (dim == 3) {
- e->inst[0] |= 0x00800000;
- if (type == TGSI_TEXTURE_CUBE)
- e->inst[0] |= 0x08000000;
+ /* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */
+ e->inst[0] |= (unit << 9) /* | (unit << 17) */;
+
+ /* live flag (don't set if TEX results affect input to another TEX): */
+ /* e->inst[0] |= 0x00000004; */
+
+ get_tex_dim(type, &dim, &arg);
+
+ if (type == TGSI_TEXTURE_CUBE) {
+ e->inst[0] |= 0x08000000;
+ load_cube_tex_coords(pc, t, src, arg, proj);
+ } else
+ if (proj)
+ load_proj_tex_coords(pc, t, src, dim, arg);
+ else {
+ for (c = 0; c < dim; c++)
+ emit_mov(pc, t[c], src[c]);
+ if (arg != dim) /* depth reference value (always src.z here) */
+ emit_mov(pc, t[dim], src[2]);
}
e->inst[0] |= (mask & 0x3) << 25;
e->inst[1] |= (mask & 0xc) << 12;
- emit(pc, e);
+ if (!bias_lod) {
+ e->inst[0] |= (arg - 1) << 22;
+ emit(pc, e);
+ } else
+ if (bias_lod < 0) {
+ e->inst[0] |= arg << 22;
+ e->inst[1] |= 0x20000000; /* texbias */
+ emit_mov(pc, t[arg], src[3]);
+ emit_texbias_sequence(pc, t, arg, e);
+ } else {
+ e->inst[0] |= arg << 22;
+ e->inst[1] |= 0x40000000; /* texlod */
+ emit_mov(pc, t[arg], src[3]);
+ emit_texlod_sequence(pc, t[arg], src[3], e);
+ }
+
#if 1
c = 0;
if (mask & 1) emit_mov(pc, dst[0], t[c++]);
@@ -1389,38 +1641,6 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
}
static void
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
- struct nv50_program_exec **join)
-{
- struct nv50_program_exec *e = exec(pc);
-
- if (join) {
- set_long(pc, e);
- e->inst[0] |= 0xa0000002;
- emit(pc, e);
- *join = e;
- e = exec(pc);
- }
-
- set_long(pc, e);
- e->inst[0] |= 0x10000002;
- if (pred >= 0)
- set_pred(pc, cc, pred, e);
- emit(pc, e);
-}
-
-static void
-emit_nop(struct nv50_pc *pc)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xf0000000;
- set_long(pc, e);
- e->inst[1] = 0xe0000000;
- emit(pc, e);
-}
-
-static void
emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
struct nv50_program_exec *e = exec(pc);
@@ -1515,8 +1735,6 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
static boolean
negate_supported(const struct tgsi_full_instruction *insn, int i)
{
- int s;
-
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_DDY:
case TGSI_OPCODE_DP3:
@@ -1526,29 +1744,14 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_SUB:
case TGSI_OPCODE_MAD:
- break;
+ return TRUE;
case TGSI_OPCODE_POW:
if (i == 1)
- break;
+ return TRUE;
return FALSE;
default:
return FALSE;
}
-
- /* Watch out for possible multiple uses of an nv50_reg, we
- * can't use nv50_reg::neg in these cases.
- */
- for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
- if (s == i)
- continue;
- if ((insn->Src[s].Register.Index ==
- insn->Src[i].Register.Index) &&
- (insn->Src[s].Register.File ==
- insn->Src[i].Register.File))
- return FALSE;
- }
-
- return TRUE;
}
/* Return a read mask for source registers deduced from opcode & write mask. */
@@ -1576,9 +1779,13 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_SCS:
return 0x1;
+ case TGSI_OPCODE_IF:
+ return 0x1;
case TGSI_OPCODE_LIT:
return 0xb;
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
{
const struct tgsi_instruction_texture *tex;
@@ -1587,13 +1794,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
tex = &insn->Texture;
mask = 0x7;
- if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
- mask |= 0x8;
+ if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ insn->Instruction.Opcode != TGSI_OPCODE_TXD)
+ mask |= 0x8; /* bias, lod or proj */
switch (tex->Texture) {
case TGSI_TEXTURE_1D:
mask &= 0x9;
break;
+ case TGSI_TEXTURE_SHADOW1D:
+ mask &= 0x5;
+ break;
case TGSI_TEXTURE_2D:
mask &= 0xb;
break;
@@ -1676,7 +1887,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
/* Indicate indirection by setting r->acc < 0 and
* use the index field to select the address reg.
*/
- r = MALLOC_STRUCT(nv50_reg);
+ r = reg_instance(pc, NULL);
swz = tgsi_util_get_src_register_swizzle(
&src->Indirect, 0);
ctor_reg(r, P_CONST,
@@ -1730,6 +1941,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
break;
}
+ if (r && r->acc >= 0 && r != temp)
+ return reg_instance(pc, r);
return r;
}
@@ -1785,6 +1998,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
case TGSI_OPCODE_LIT:
case TGSI_OPCODE_SCS:
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
/* these take care of dangerous swizzles themselves */
return 0x0;
@@ -2187,11 +2402,19 @@ nv50_program_tx_insn(struct nv50_pc *pc,
break;
case TGSI_OPCODE_TEX:
emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, FALSE);
+ inst->Texture.Texture, FALSE, 0);
+ break;
+ case TGSI_OPCODE_TXB:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->Texture.Texture, FALSE, -1);
+ break;
+ case TGSI_OPCODE_TXL:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->Texture.Texture, FALSE, 1);
break;
case TGSI_OPCODE_TXP:
emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, TRUE);
+ inst->Texture.Texture, TRUE, 0);
break;
case TGSI_OPCODE_TRUNC:
for (c = 0; c < 4; c++) {
@@ -2245,20 +2468,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
}
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- for (c = 0; c < 4; c++) {
- if (!src[i][c])
- continue;
- src[i][c]->mod = 0;
- if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
- FREE(src[i][c]);
- else
- if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
- FREE(src[i][c]); /* indirect constant */
- }
- }
-
kill_temp_temp(pc);
+ pc->reg_instance_nr = 0;
+
return TRUE;
}
@@ -2541,10 +2753,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
const struct tgsi_full_immediate *imm =
&tp.FullToken.FullImmediate;
- ctor_immd(pc, imm->u[0].Float,
- imm->u[1].Float,
- imm->u[2].Float,
- imm->u[3].Float);
+ ctor_immd_4f32(pc, imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_DECLARATION:
@@ -3024,7 +3236,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
}
static void
-nv50_program_upload_data(struct nv50_context *nv50, float *map,
+nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map,
unsigned start, unsigned count, unsigned cbuf)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
@@ -3072,8 +3284,8 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
if (p->param_nr) {
unsigned cb;
- float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
- PIPE_BUFFER_USAGE_CPU_READ);
+ uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+ PIPE_BUFFER_USAGE_CPU_READ);
if (p->type == PIPE_SHADER_VERTEX)
cb = NV50_CB_PVP;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 255c7c737ef..4a90c372ce3 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -37,7 +37,7 @@ struct nv50_program {
struct nouveau_bo *bo;
- float *immd;
+ uint32_t *immd;
unsigned immd_nr;
unsigned param_nr;
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 5305c93d59a..268c9823f7d 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -93,7 +93,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
- WAIT_RING (chan, 5);
+ MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index e1b2f11239a..d443ca3ad06 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -76,6 +76,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
@@ -97,6 +98,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 32;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 32;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -122,8 +127,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
case PIPE_CAP_TGSI_CONT_SUPPORTED:
return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -315,6 +318,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, 0x1400, 1);
so_data (so, 0xf);
+ /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
+ so_method(so, screen->tesla, 0x13b4, 1);
+ so_data (so, 0x54);
so_method(so, screen->tesla, 0x13bc, 1);
so_data (so, 0x54);
/* origin is top left (set to 1 for bottom left) */
@@ -387,7 +393,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ &screen->tic);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
@@ -398,9 +405,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, 0x000007ff);
+ so_data (so, PIPE_SHADER_TYPES * 32 - 1);
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ &screen->tsc);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
@@ -411,7 +419,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, 0x00000000);
+ so_data (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
/* Vertex array limits - max them out */
@@ -425,6 +433,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, fui(0.0));
so_data (so, fui(1.0));
+ /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
so_method(so, screen->tesla, 0x1234, 1);
so_data (so, 1);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 07318f23947..88aef52d08c 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -196,8 +196,9 @@ nv50_sampler_state_create(struct pipe_context *pipe,
}
if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- tsc[0] |= (1 << 8);
- tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+ /* XXX: must be deactivated for non-shadow textures */
+ tsc[0] |= (1 << 9);
+ tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
}
limit = CLAMP(cso->lod_bias, -16.0, 15.0);
@@ -215,41 +216,66 @@ nv50_sampler_state_create(struct pipe_context *pipe,
return (void *)sso;
}
-static void
-nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+static INLINE void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
+ unsigned nr, void **sampler)
{
struct nv50_context *nv50 = nv50_context(pipe);
- int i;
- nv50->sampler_nr = nr;
- for (i = 0; i < nv50->sampler_nr; i++)
- nv50->sampler[i] = sampler[i];
+ memcpy(nv50->sampler[type], sampler, nr * sizeof(void *));
+ nv50->sampler_nr[type] = nr;
nv50->dirty |= NV50_NEW_SAMPLER;
}
static void
+nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s);
+}
+
+static void
+nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s);
+}
+
+static void
nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}
-static void
-nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
- struct pipe_texture **pt)
+static INLINE void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type,
+ unsigned nr, struct pipe_texture **pt)
{
struct nv50_context *nv50 = nv50_context(pipe);
- int i;
+ unsigned i;
for (i = 0; i < nr; i++)
- pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
- for (i = nr; i < nv50->miptree_nr; i++)
- pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+ pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]);
+ for (i = nr; i < nv50->miptree_nr[type]; i++)
+ pipe_texture_reference((void *)&nv50->miptree[type][i], NULL);
- nv50->miptree_nr = nr;
+ nv50->miptree_nr[type] = nr;
nv50->dirty |= NV50_NEW_TEXTURE;
}
+static void
+nv50_set_vp_sampler_textures(struct pipe_context *pipe,
+ unsigned nr, struct pipe_texture **pt)
+{
+ nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt);
+}
+
+static void
+nv50_set_fp_sampler_textures(struct pipe_context *pipe,
+ unsigned nr, struct pipe_texture **pt)
+{
+ nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt);
+}
+
static void *
nv50_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
@@ -648,9 +674,11 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.delete_blend_state = nv50_blend_state_delete;
nv50->pipe.create_sampler_state = nv50_sampler_state_create;
- nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind;
nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
- nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture;
+ nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
+ nv50->pipe.bind_vertex_sampler_states = nv50_vp_sampler_state_bind;
+ nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures;
+ nv50->pipe.set_vertex_sampler_textures = nv50_set_vp_sampler_textures;
nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c871acaab8d..871e8097b65 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -156,6 +156,30 @@ nv50_state_validate_fb(struct nv50_context *nv50)
}
static void
+nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
+ unsigned p)
+{
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ unsigned i, j, dw = nv50->sampler_nr[p] * 8;
+
+ if (!dw)
+ return;
+ nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
+ p * (32 * 8 * 4), dw * 4);
+
+ so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw);
+
+ for (i = 0; i < nv50->sampler_nr[p]; ++i) {
+ if (nv50->sampler[p][i])
+ so_datap(so, nv50->sampler[p][i]->tsc, 8);
+ else {
+ for (j = 0; j < 8; ++j) /* you get punished */
+ so_data(so, 0); /* ... for leaving holes */
+ }
+ }
+}
+
+static void
nv50_state_emit(struct nv50_context *nv50)
{
struct nv50_screen *screen = nv50->screen;
@@ -246,7 +270,6 @@ boolean
nv50_state_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
struct nouveau_stateobj *so;
unsigned i;
@@ -369,22 +392,16 @@ scissor_uptodate:
viewport_uptodate:
if (nv50->dirty & NV50_NEW_SAMPLER) {
- unsigned i;
+ unsigned nr = 0;
- so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2);
+ for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+ nr += nv50->sampler_nr[i];
- nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
- nv50->sampler_nr * 8 * 4);
+ so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4);
- for (i = 0; i < nv50->sampler_nr; i++) {
- if (!nv50->sampler[i])
- continue;
- so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
- so_datap (so, nv50->sampler[i]->tsc, 8);
- }
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
- so_method(so, tesla, 0x1440, 1); /* sync SIFC */
- so_data (so, 0);
so_method(so, tesla, 0x1334, 1); /* flush TSC */
so_data (so, 0);
@@ -407,10 +424,13 @@ viewport_uptodate:
void nv50_so_init_sifc(struct nv50_context *nv50,
struct nouveau_stateobj *so,
- struct nouveau_bo *bo, unsigned reloc, unsigned size)
+ struct nouveau_bo *bo, unsigned reloc,
+ unsigned offset, unsigned size)
{
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ reloc |= NOUVEAU_BO_WR;
+
so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
so_data (so, NV50_2D_DST_FORMAT_R8_UNORM);
so_data (so, 1);
@@ -418,8 +438,8 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
so_data (so, 262144);
so_data (so, 65536);
so_data (so, 1);
- so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
so_data (so, 0);
so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 6bf6f773b0c..79655fc08d5 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -62,6 +62,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
return 1;
if (!bo->tile_flags) {
+ MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
@@ -72,6 +73,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
OUT_RELOCh(chan, bo, ps->offset, flags);
OUT_RELOCl(chan, bo, ps->offset, flags);
} else {
+ MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, format);
OUT_RING (chan, 0);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 417d3679422..120aa6f362b 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -68,6 +68,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
_MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
+ _MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
_(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
_(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
@@ -85,7 +86,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
static int
nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
- struct nv50_miptree *mt, int unit)
+ struct nv50_miptree *mt, int unit, unsigned p)
{
unsigned i;
uint32_t mode;
@@ -96,7 +97,7 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
if (i == NV50_TEX_FORMAT_LIST_SIZE)
return 1;
- if (nv50->sampler[unit]->normalized)
+ if (nv50->sampler[p][unit]->normalized)
mode = 0x50001000 | (1 << 31);
else {
mode = 0x50001000 | (7 << 14);
@@ -140,48 +141,78 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
return 0;
}
-void
-nv50_tex_validate(struct nv50_context *nv50)
+#ifndef NV50TCL_BIND_TIC
+#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n)
+#endif
+
+static boolean
+nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
+ unsigned p)
{
+ static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 };
+
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so;
- unsigned i, unit, push;
-
- push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6;
- so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2);
+ unsigned unit, j, p_hw = p_remap[p];
nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
- nv50->miptree_nr * 8 * 4);
+ p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4);
- for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) {
- struct nv50_miptree *mt = nv50->miptree[unit];
-
- if (!mt)
- continue;
+ for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) {
+ struct nv50_miptree *mt = nv50->miptree[p][unit];
so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
- if (nv50_tex_construct(nv50, so, mt, unit)) {
- NOUVEAU_ERR("failed tex validate\n");
- so_ref(NULL, &so);
- return;
+ if (mt) {
+ if (nv50_tex_construct(nv50, so, mt, unit, p))
+ return FALSE;
+ /* Set TEX insn $t src binding $unit in program type p
+ * to TIC, TSC entry (32 * p + unit), mark valid (1).
+ */
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
+ } else {
+ for (j = 0; j < 8; ++j)
+ so_data(so, 0);
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, (unit << 1) | 0);
}
+ }
+
+ for (; unit < nv50->state.miptree_nr[p]; unit++) {
+ /* Make other bindings invalid. */
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, (unit << 1) | 0);
+ }
+
+ nv50->state.miptree_nr[p] = nv50->miptree_nr[p];
+ return TRUE;
+}
- so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
- so_data (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
- (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
- NV50TCL_SET_SAMPLER_TEX_VALID);
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned p, push, nrlc;
+
+ for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+ push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
+ nrlc += nv50->miptree_nr[p];
}
+ push = push * 11 + 23 * PIPE_SHADER_TYPES + 4;
+ nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
+
+ so = so_new(push, nrlc);
+
+ if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
+ nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
+ so_ref(NULL, &so);
- for (; unit < nv50->state.miptree_nr; unit++) {
- so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
- so_data (so,
- (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
+ NOUVEAU_ERR("failed tex validate\n");
+ return;
}
/* not sure if the following really do what I think: */
- so_method(so, tesla, 0x1440, 1); /* sync SIFC */
- so_data (so, 0);
so_method(so, tesla, 0x1330, 1); /* flush TIC */
so_data (so, 0);
so_method(so, tesla, 0x1338, 1); /* flush texture caches */
@@ -189,6 +220,4 @@ nv50_tex_validate(struct nv50_context *nv50)
so_ref(so, &nv50->state.tic_upload);
so_ref(NULL, &so);
- nv50->state.miptree_nr = nv50->miptree_nr;
}
-
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index d531e611327..b870302019a 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -82,6 +82,7 @@
#define NV50TIC_0_0_FMT_RGTC1 0x00000027
#define NV50TIC_0_0_FMT_RGTC2 0x00000028
#define NV50TIC_0_0_FMT_24_8 0x00000029
+#define NV50TIC_0_0_FMT_8_24 0x0000002a
#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f
#define NV50TIC_0_0_FMT_32_8 0x00000030
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 4705f96f572..6240a0c757a 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -16,6 +16,7 @@ struct nv50_transfer {
int level_depth;
int level_x;
int level_y;
+ int level_z;
unsigned nblocksx;
unsigned nblocksy;
};
@@ -24,10 +25,10 @@ static void
nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
struct nouveau_bo *src_bo, unsigned src_offset,
int src_pitch, unsigned src_tile_mode,
- int sx, int sy, int sw, int sh, int sd,
+ int sx, int sy, int sz, int sw, int sh, int sd,
struct nouveau_bo *dst_bo, unsigned dst_offset,
int dst_pitch, unsigned dst_tile_mode,
- int dx, int dy, int dw, int dh, int dd,
+ int dx, int dy, int dz, int dw, int dh, int dd,
int cpp, int width, int height,
unsigned src_reloc, unsigned dst_reloc)
{
@@ -56,7 +57,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, sw * cpp);
OUT_RING (chan, sh);
OUT_RING (chan, sd);
- OUT_RING (chan, 0);
+ OUT_RING (chan, sz); /* copying only 1 zslice per call */
}
if (!dst_bo->tile_flags) {
@@ -75,13 +76,13 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, dw * cpp);
OUT_RING (chan, dh);
OUT_RING (chan, dd);
- OUT_RING (chan, 0);
+ OUT_RING (chan, dz); /* copying only 1 zslice per call */
}
while (height) {
int line_count = height > 2047 ? 2047 : height;
- WAIT_RING (chan, 15);
+ MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
@@ -118,20 +119,6 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
}
}
-static INLINE unsigned
-get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny)
-{
- unsigned tile_h = get_tile_height(tile_mode);
- unsigned tile_d = get_tile_depth(tile_mode);
-
- /* pitch_2d == to next slice within this volume-tile */
- /* pitch_3d == to next slice in next 2D array of blocks */
- unsigned pitch_2d = tile_h * 64;
- unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch;
-
- return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
-}
-
static struct pipe_transfer *
nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
@@ -166,6 +153,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->level_depth = u_minify(mt->base.base.depth0, level);
tx->level_offset = lvl->image_offset[image];
tx->level_tiling = lvl->tile_mode;
+ tx->level_z = zslice;
tx->level_x = pf_get_nblocksx(pt->format, x);
tx->level_y = pf_get_nblocksy(pt->format, y);
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
@@ -175,23 +163,18 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
- if (pt->target == PIPE_TEXTURE_3D)
- tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
- lvl->pitch,
- tx->nblocksy);
-
if (usage & PIPE_TRANSFER_READ) {
nx = pf_get_nblocksx(pt->format, tx->base.width);
ny = pf_get_nblocksy(pt->format, tx->base.height);
nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
tx->level_pitch, tx->level_tiling,
- x, y,
+ x, y, zslice,
tx->nblocksx, tx->nblocksy,
tx->level_depth,
tx->bo, 0,
tx->base.stride, tx->bo->tile_mode,
- 0, 0,
+ 0, 0, 0,
tx->nblocksx, tx->nblocksy, 1,
pf_get_blocksize(pt->format), nx, ny,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
@@ -216,11 +199,11 @@ nv50_transfer_del(struct pipe_transfer *ptx)
nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
tx->base.stride, tx->bo->tile_mode,
- 0, 0,
+ 0, 0, 0,
tx->nblocksx, tx->nblocksy, 1,
mt->base.bo, tx->level_offset,
tx->level_pitch, tx->level_tiling,
- tx->level_x, tx->level_y,
+ tx->level_x, tx->level_y, tx->level_z,
tx->nblocksx, tx->nblocksy,
tx->level_depth,
pf_get_blocksize(pt->format), nx, ny,
@@ -282,7 +265,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
reloc |= NOUVEAU_BO_WR;
- WAIT_RING (chan, 32);
+ MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
if (bo->tile_flags) {
BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 9c9fc6f64b3..8cfd4147c20 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
LIBNAME = r300
C_SOURCES = \
+ r300_blit.c \
r300_chipset.c \
- r300_clear.c \
r300_context.c \
r300_debug.c \
r300_emit.c \
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 97989040d2e..0d2de17be93 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -9,8 +9,8 @@ env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/sr
r300 = env.ConvenienceLibrary(
target = 'r300',
source = [
+ 'r300_blit.c',
'r300_chipset.c',
- 'r300_clear.c',
'r300_context.c',
'r300_debug.c',
'r300_emit.c',
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
new file mode 100644
index 00000000000..ffe066d5369
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2009 Marek Olšák <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_blit.h"
+#include "r300_context.h"
+
+#include "util/u_rect.h"
+
+static void r300_blitter_save_states(struct r300_context* r300)
+{
+ util_blitter_save_blend(r300->blitter, r300->blend_state);
+ util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state);
+ util_blitter_save_rasterizer(r300->blitter, r300->rs_state);
+ util_blitter_save_fragment_shader(r300->blitter, r300->fs);
+ util_blitter_save_vertex_shader(r300->blitter, r300->vs);
+}
+
+/* Clear currently bound buffers. */
+void r300_clear(struct pipe_context* pipe,
+ unsigned buffers,
+ const float* rgba,
+ double depth,
+ unsigned stencil)
+{
+ /* XXX Implement fastfill.
+ *
+ * If fastfill is enabled, a few facts should be considered:
+ *
+ * 1) Zbuffer must be micro-tiled and whole microtiles must be
+ * written.
+ *
+ * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be
+ * equal to 0.
+ *
+ * 3) RB3D_COLOR_CLEAR_VALUE is used to clear a colorbuffer and
+ * RB3D_COLOR_CHANNEL_MASK must be equal to 0.
+ *
+ * 4) ZB_CB_CLEAR can be used to make the ZB units help in clearing
+ * the colorbuffer. The color clear value is supplied through both
+ * RB3D_COLOR_CLEAR_VALUE and ZB_DEPTHCLEARVALUE, and the colorbuffer
+ * must be set in ZB_DEPTHOFFSET and ZB_DEPTHPITCH in addition to
+ * RB3D_COLOROFFSET and RB3D_COLORPITCH. It's obvious that the zbuffer
+ * will not be cleared and multiple render targets cannot be cleared
+ * this way either.
+ *
+ * 5) For 16-bit integer buffering, compression causes a hung with one or
+ * two samples and should not be used.
+ *
+ * 6) Fastfill must not be used if reading of compressed Z data is disabled
+ * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
+ * i.e. it cannot be used to compress the zbuffer.
+ * (what the hell does that mean and how does it fit in clearing
+ * the buffers?)
+ *
+ * - Marek
+ */
+
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300_blitter_save_states(r300);
+
+ util_blitter_clear(r300->blitter,
+ r300->framebuffer_state.width,
+ r300->framebuffer_state.height,
+ r300->framebuffer_state.nr_cbufs,
+ buffers, rgba, depth, stencil);
+}
+
+/* Copy a block of pixels from one surface to another. */
+void r300_surface_copy(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface* src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ /* Yeah we have to save all those states to ensure this blitter operation
+ * is really transparent. The states will be restored by the blitter once
+ * copying is done. */
+ r300_blitter_save_states(r300);
+ util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+ util_blitter_save_fragment_sampler_states(
+ r300->blitter, r300->sampler_count, (void**)r300->sampler_states);
+
+ util_blitter_save_fragment_sampler_textures(
+ r300->blitter, r300->texture_count,
+ (struct pipe_texture**)r300->textures);
+
+ /* Do a copy */
+ util_blitter_copy(r300->blitter,
+ dst, dstx, dsty, src, srcx, srcy, width, height, TRUE);
+}
+
+/* Fill a region of a surface with a constant value. */
+void r300_surface_fill(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ unsigned value)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300_blitter_save_states(r300);
+ util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+ util_blitter_fill(r300->blitter,
+ dst, dstx, dsty, width, height, value);
+}
diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_blit.h
index b8fcdf273c7..029e4f98e7d 100644
--- a/src/gallium/drivers/r300/r300_clear.h
+++ b/src/gallium/drivers/r300/r300_blit.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Corbin Simpson <[email protected]>
+ * Copyright 2008 Marek Olšák <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,10 +20,11 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#ifndef R300_CLEAR_H
-#define R300_CLEAR_H
+#ifndef R300_BLIT_H
+#define R300_BLIT_H
struct pipe_context;
+struct pipe_surface;
void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@@ -31,4 +32,17 @@ void r300_clear(struct pipe_context* pipe,
double depth,
unsigned stencil);
-#endif /* R300_CLEAR_H */
+void r300_surface_copy(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface* src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height);
+
+void r300_surface_fill(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ unsigned value);
+
+#endif /* R300_BLIT_H */
diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c
deleted file mode 100644
index 02d6d504fc0..00000000000
--- a/src/gallium/drivers/r300/r300_clear.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_clear.h"
-#include "r300_context.h"
-
-#include "util/u_clear.h"
-
-/* Clears currently bound buffers. */
-void r300_clear(struct pipe_context* pipe,
- unsigned buffers,
- const float* rgba,
- double depth,
- unsigned stencil)
-{
- /* XXX we can and should do one clear if both color and zs are set */
- util_clear(pipe, &r300_context(pipe)->framebuffer_state,
- buffers, rgba, depth, stencil);
-}
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 5b337f03ac4..d5c2d63d393 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -28,7 +28,7 @@
#include "util/u_memory.h"
#include "util/u_simple_list.h"
-#include "r300_clear.h"
+#include "r300_blit.h"
#include "r300_context.h"
#include "r300_flush.h"
#include "r300_query.h"
@@ -52,6 +52,8 @@ static void r300_destroy_context(struct pipe_context* context)
struct r300_context* r300 = r300_context(context);
struct r300_query* query, * temp;
+ util_blitter_destroy(r300->blitter);
+
util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table,
NULL);
util_hash_table_destroy(r300->shader_hash_table);
@@ -124,6 +126,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.destroy = r300_destroy_context;
r300->context.clear = r300_clear;
+ r300->context.surface_copy = r300_surface_copy;
+ r300->context.surface_fill = r300_surface_fill;
if (r300screen->caps->has_tcl) {
r300->context.draw_arrays = r300_draw_arrays;
@@ -175,5 +179,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
r300->dirty_state = R300_NEW_KITCHEN_SINK;
r300->dirty_hw++;
+
+ r300->blitter = util_blitter_create(&r300->context);
+
return &r300->context;
}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 0be190392a5..6bd2766730f 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -25,6 +25,8 @@
#include "draw/draw_vertex.h"
+#include "util/u_blitter.h"
+
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
@@ -248,6 +250,8 @@ struct r300_context {
struct radeon_winsys* winsys;
/* Draw module. Used mostly for SW TCL. */
struct draw_context* draw;
+ /* Accelerated blit support. */
+ struct blitter_context* blitter;
/* Vertex buffer for rendering. */
struct pipe_buffer* vbo;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index f784e1fa8e5..55c8aa07bdf 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -41,9 +41,16 @@ void r300_emit_blend_state(struct r300_context* r300,
CS_LOCALS(r300);
BEGIN_CS(8);
OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
- OUT_CS(blend->blend_control);
- OUT_CS(blend->alpha_blend_control);
- OUT_CS(blend->color_channel_mask);
+ if (r300->framebuffer_state.nr_cbufs) {
+ OUT_CS(blend->blend_control);
+ OUT_CS(blend->alpha_blend_control);
+ OUT_CS(blend->color_channel_mask);
+ } else {
+ OUT_CS(0);
+ OUT_CS(0);
+ OUT_CS(0);
+ /* XXX also disable fastfill here once it's supported */
+ }
OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
END_CS;
@@ -335,7 +342,7 @@ void r300_emit_fb_state(struct r300_context* r300,
assert(fb->nr_cbufs <= 4);
BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) +
- (fb->zsbuf ? 10 : 0) + 4);
+ (fb->zsbuf ? 10 : 0) + 6);
/* Flush and free renderbuffer caches. */
OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
@@ -345,6 +352,9 @@ void r300_emit_fb_state(struct r300_context* r300,
R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+ /* Set the number of colorbuffers. */
+ OUT_CS_REG(R300_RB3D_CCTL, R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs));
+
/* Set up colorbuffers. */
for (i = 0; i < fb->nr_cbufs; i++) {
surf = fb->cbufs[i];
@@ -873,10 +883,21 @@ void r300_emit_viewport_state(struct r300_context* r300,
void r300_emit_texture_count(struct r300_context* r300)
{
+ uint32_t tx_enable = 0;
+ int i;
CS_LOCALS(r300);
+ /* Notice that texture_count and sampler_count are just sizes
+ * of the respective arrays. We still have to check for the individual
+ * elements. */
+ for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
+ if (r300->textures[i]) {
+ tx_enable |= 1 << i;
+ }
+ }
+
BEGIN_CS(2);
- OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
+ OUT_CS_REG(R300_TX_ENABLE, tx_enable);
END_CS;
}
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index c1ea87d11e9..d8d08fbe264 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2145,6 +2145,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Unpipelined. */
#define R300_RB3D_CCTL 0x4e00
+# define R300_RB3D_CCTL_NUM_MULTIWRITES(x) (MAX2(((x)-1), 0) << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 35b335df6a1..4b210f72db2 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -82,8 +82,9 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
{
CS_LOCALS(r300);
- BEGIN_CS(4);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
+ BEGIN_CS(6);
+ OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
r300_translate_primitive(mode));
@@ -108,7 +109,8 @@ static void r300_emit_draw_elements(struct r300_context *r300,
assert((start * indexSize) % 4 == 0);
assert(offset_dwords == 0);
- BEGIN_CS(10);
+ BEGIN_CS(12);
+ OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
if (indexSize == 4) {
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index c0d9797020d..feb571a23dd 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -220,12 +220,18 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage,
/* Z buffer or texture */
case PIPE_FORMAT_Z16_UNORM:
+ retval = usage &
+ (PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
+ PIPE_TEXTURE_USAGE_SAMPLER);
+ break;
+
+ /* 24bit Z buffer can only be used as a texture on R500. */
case PIPE_FORMAT_Z24X8_UNORM:
/* Z buffer with stencil or texture */
case PIPE_FORMAT_Z24S8_UNORM:
retval = usage &
(PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
- PIPE_TEXTURE_USAGE_SAMPLER);
+ (is_r500 ? PIPE_TEXTURE_USAGE_SAMPLER : 0));
break;
/* Definitely unsupported formats. */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index edf7114bbb5..91cf972edee 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -339,6 +339,7 @@ static void
r300->dirty_state |= R300_NEW_SCISSOR;
}
r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
+ r300->dirty_state |= R300_NEW_BLEND;
}
/* Create fragment shader state. */
@@ -626,8 +627,6 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
return;
}
- r300->context.flush(&r300->context, 0, NULL);
-
for (i = 0; i < count; i++) {
if (r300->textures[i] != (struct r300_texture*)texture[i]) {
pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 46d1cb39b54..bcd4c030f9c 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
CS_LOCALS(r300);
- BEGIN_CS(24 + (caps->has_tcl ? 2: 0));
+ BEGIN_CS(20 + (caps->has_tcl ? 2: 0));
/*** Graphics Backend (GB) ***/
/* Various GB enables */
@@ -70,9 +70,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_US_W_FMT, 0x0);
/*** VAP ***/
- /* Max and min vertex index clamp. */
- OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff);
/* Sign/normalize control */
OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
/* TCL-only stuff */
@@ -84,15 +81,11 @@ void r300_emit_invariant_state(struct r300_context* r300)
END_CS;
/* XXX unsorted stuff from surface_fill */
- BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
- /* Flush PVS. */
- OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+ BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0));
- OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
- R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT);
if (caps->has_tcl) {
+ /*Flushing PVS is required before the VAP_GB registers can be changed*/
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
OUT_CS_32F(1.0);
OUT_CS_32F(1.0);
@@ -123,19 +116,15 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
- OUT_CS_REG(R300_RB3D_CCTL, 0x00000000);
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
if (caps->is_r500) {
OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF);
}
- OUT_CS_REG(R300_ZB_FORMAT, 0x00000002);
- OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003);
OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
- OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
/* XXX */
OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa);
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 69a0970d5f8..fe1390d765f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -140,7 +140,8 @@ enum pipe_texture_target {
PIPE_TEXTURE_1D = 0,
PIPE_TEXTURE_2D = 1,
PIPE_TEXTURE_3D = 2,
- PIPE_TEXTURE_CUBE = 3
+ PIPE_TEXTURE_CUBE = 3,
+ PIPE_MAX_TEXTURE_TYPES
};
#define PIPE_TEX_FACE_POS_X 0
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 588ca5e026d..79f3d3f0566 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -55,6 +55,7 @@ struct tgsi_processor
#define TGSI_TOKEN_TYPE_DECLARATION 0
#define TGSI_TOKEN_TYPE_IMMEDIATE 1
#define TGSI_TOKEN_TYPE_INSTRUCTION 2
+#define TGSI_TOKEN_TYPE_PROPERTY 3
struct tgsi_token
{
@@ -64,16 +65,17 @@ struct tgsi_token
};
enum tgsi_file_type {
- TGSI_FILE_NULL =0,
- TGSI_FILE_CONSTANT =1,
- TGSI_FILE_INPUT =2,
- TGSI_FILE_OUTPUT =3,
- TGSI_FILE_TEMPORARY =4,
- TGSI_FILE_SAMPLER =5,
- TGSI_FILE_ADDRESS =6,
- TGSI_FILE_IMMEDIATE =7,
- TGSI_FILE_LOOP =8,
- TGSI_FILE_PREDICATE =9,
+ TGSI_FILE_NULL =0,
+ TGSI_FILE_CONSTANT =1,
+ TGSI_FILE_INPUT =2,
+ TGSI_FILE_OUTPUT =3,
+ TGSI_FILE_TEMPORARY =4,
+ TGSI_FILE_SAMPLER =5,
+ TGSI_FILE_ADDRESS =6,
+ TGSI_FILE_IMMEDIATE =7,
+ TGSI_FILE_LOOP =8,
+ TGSI_FILE_PREDICATE =9,
+ TGSI_FILE_SYSTEM_VALUE =10,
TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */
};
@@ -151,6 +153,22 @@ union tgsi_immediate_data
float Float;
};
+#define TGSI_PROPERTY_GS_INPUT_PRIM 0
+#define TGSI_PROPERTY_GS_OUTPUT_PRIM 1
+#define TGSI_PROPERTY_GS_MAX_VERTICES 2
+#define TGSI_PROPERTY_COUNT 3
+
+struct tgsi_property {
+ unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */
+ unsigned NrTokens : 8; /**< UINT */
+ unsigned PropertyName : 8; /**< one of TGSI_PROPERTY */
+ unsigned Padding : 12;
+};
+
+struct tgsi_property_data {
+ unsigned Data;
+};
+
/* TGSI opcodes.
*
* For more information on semantics of opcodes and
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 2a8daed051d..76acc99ad73 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -81,6 +81,7 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
domain |= RADEON_GEM_DOMAIN_GTT;
}
+ radeon_buffer->ws = radeon_ws;
radeon_buffer->bo = radeon_bo_open(radeon_ws->priv->bom, 0, size,
alignment, domain, 0);
if (radeon_buffer->bo == NULL) {
@@ -131,6 +132,11 @@ static void radeon_buffer_del(struct pipe_buffer *buffer)
{
struct radeon_pipe_buffer *radeon_buffer =
(struct radeon_pipe_buffer*)buffer;
+ struct radeon_winsys_priv *priv = radeon_buffer->ws->priv;
+
+ if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) {
+ priv->cs->space_flush_fn(priv->cs->space_flush_data);
+ }
radeon_bo_unref(radeon_buffer->bo);
free(radeon_buffer);
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
index bfe2221d1ed..1e91e18927a 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
@@ -50,6 +50,7 @@
struct radeon_pipe_buffer {
struct pipe_buffer base;
struct radeon_bo *bo;
+ struct radeon_winsys *ws;
boolean flinked;
uint32_t flink;
};
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index dec7c065036..2f7fbc72423 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -171,6 +171,7 @@ struct pipe_buffer* radeon_buffer_from_handle(struct drm_api* api,
radeon_buffer->base.screen = screen;
radeon_buffer->base.usage = PIPE_BUFFER_USAGE_PIXEL;
radeon_buffer->bo = bo;
+ radeon_buffer->ws = (struct radeon_winsys*)screen->winsys;
return &radeon_buffer->base;
}
diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c
index 10e1b3c9124..ca6dd3bcf8e 100644
--- a/src/mesa/drivers/dri/r300/r300_blit.c
+++ b/src/mesa/drivers/dri/r300/r300_blit.c
@@ -141,7 +141,7 @@ static void r300_emit_tx_setup(struct r300_context *r300,
assert(width <= 2048);
assert(height <= 2048);
- assert(r300TranslateTexFormat(mesa_format) != 0);
+ assert(r300TranslateTexFormat(mesa_format) >= 0);
assert(offset % 32 == 0);
BEGIN_BATCH(17);
@@ -315,8 +315,8 @@ static void r300_emit_rs_setup(struct r300_context *r300)
OUT_BATCH_REGVAL(R300_RS_IP_0,
R300_RS_TEX_PTR(0) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
- R300_RS_SEL_R(R300_RS_SEL_C1) |
- R300_RS_SEL_T(R300_RS_SEL_K0) |
+ R300_RS_SEL_T(R300_RS_SEL_C1) |
+ R300_RS_SEL_R(R300_RS_SEL_K0) |
R300_RS_SEL_Q(R300_RS_SEL_K1));
END_BATCH();
}
@@ -428,6 +428,7 @@ static void emit_cb_setup(struct r300_context *r300,
struct radeon_bo *bo,
intptr_t offset,
gl_format mesa_format,
+ unsigned pitch,
unsigned width,
unsigned height)
{
@@ -448,7 +449,7 @@ static void emit_cb_setup(struct r300_context *r300,
r300_emit_cb_setup(r300, bo, offset, mesa_format,
_mesa_get_format_bytes(mesa_format),
- _mesa_format_row_stride(mesa_format, width));
+ _mesa_format_row_stride(mesa_format, pitch));
BEGIN_BATCH_NO_AUTOSTATE(5);
OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
@@ -468,18 +469,14 @@ GLboolean r300_blit(struct r300_context *r300,
struct radeon_bo *dst_bo,
intptr_t dst_offset,
gl_format dst_mesaformat,
+ unsigned dst_pitch,
unsigned dst_width,
unsigned dst_height)
{
- //assert(src_width == dst_width);
- //assert(src_height == dst_height);
-
if (src_bo == dst_bo) {
return GL_FALSE;
}
- //return GL_FALSE;
-
if (0) {
fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n",
src_width, src_height, src_pitch,
@@ -511,14 +508,13 @@ GLboolean r300_blit(struct r300_context *r300,
emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2);
emit_vap_setup(r300, dst_width, dst_height);
- emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_width, dst_height);
+ emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
emit_draw_packet(r300, dst_width, dst_height);
r300EmitCacheFlush(r300);
radeonFlush(r300->radeon.glCtx);
- //r300ResetHwState(r300);
return GL_TRUE;
} \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h
index 29c5aa95149..28ffd4ea421 100644
--- a/src/mesa/drivers/dri/r300/r300_blit.h
+++ b/src/mesa/drivers/dri/r300/r300_blit.h
@@ -40,6 +40,7 @@ GLboolean r300_blit(struct r300_context *r300,
struct radeon_bo *dst_bo,
intptr_t dst_offset,
gl_format dst_mesaformat,
+ unsigned dst_pitch,
unsigned dst_width,
unsigned dst_height);
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 05005f61c30..3c6ec2a34a8 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -486,7 +486,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
r300_init_vtbl(&r300->radeon);
_mesa_init_driver_functions(&functions);
- r300_init_texcopy_functions(&functions);
r300InitIoctlFuncs(&functions);
r300InitStateFuncs(&functions);
r300InitTextureFuncs(&functions);
@@ -494,6 +493,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
radeonInitQueryObjFunctions(&functions);
radeonInitBufferObjectFuncs(&functions);
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ r300_init_texcopy_functions(&functions);
+ }
+
if (!radeonInitContext(&r300->radeon, &functions,
glVisual, driContextPriv,
sharedContextPrivate)) {
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
index beb10072e9c..6ede0fe25c9 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.h
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -51,6 +51,6 @@ extern GLboolean r300ValidateBuffers(GLcontext * ctx);
extern void r300InitTextureFuncs(struct dd_function_table *functions);
-uint32_t r300TranslateTexFormat(gl_format mesaFormat);
+int32_t r300TranslateTexFormat(gl_format mesaFormat);
#endif /* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_texcopy.c b/src/mesa/drivers/dri/r300/r300_texcopy.c
index 5e3a724d4e6..7702a1d67df 100644
--- a/src/mesa/drivers/dri/r300/r300_texcopy.c
+++ b/src/mesa/drivers/dri/r300/r300_texcopy.c
@@ -63,7 +63,6 @@ do_copy_texsubimage(GLcontext *ctx,
assert(timg->mt->bo);
assert(timg->base.Width >= dstx + width);
assert(timg->base.Height >= dsty + height);
- //assert(tobj->mt == timg->mt);
intptr_t src_offset = rrb->draw_offset + x * rrb->cpp + y * rrb->pitch;
intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level);
@@ -87,7 +86,7 @@ do_copy_texsubimage(GLcontext *ctx,
/* blit from src buffer to texture */
return r300_blit(r300, rrb->bo, src_offset, rrb->base.Format, rrb->pitch,
rrb->base.Width, rrb->base.Height, timg->mt->bo ? timg->mt->bo : timg->bo, dst_offset,
- timg->base.TexFormat, width, height);
+ timg->base.TexFormat, timg->base.Width, width, height);
}
static void
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 6db56ba618f..d4a728381e4 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* identically. -- paulus
*/
-uint32_t r300TranslateTexFormat(gl_format mesaFormat)
+int32_t r300TranslateTexFormat(gl_format mesaFormat)
{
switch (mesaFormat)
{
@@ -168,7 +168,7 @@ uint32_t r300TranslateTexFormat(gl_format mesaFormat)
case MESA_FORMAT_SRGBA_DXT5:
return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5) | R300_TX_FORMAT_GAMMA;
default:
- return 0;
+ return -1;
}
};
@@ -252,12 +252,13 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
r300SetDepthTexMode(&t->base);
} else {
- t->pp_txformat = r300TranslateTexFormat(firstImage->TexFormat);
- if (t->pp_txformat == 0) {
+ int32_t txformat = r300TranslateTexFormat(firstImage->TexFormat);
+ if (txformat < 0) {
_mesa_problem(rmesa->radeon.glCtx, "%s: Invalid format %s",
__FUNCTION__, _mesa_get_format_name(firstImage->TexFormat));
_mesa_exit(1);
}
+ t->pp_txformat = (uint32_t) txformat;
}
}
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index d27a3245a39..5e1504872d6 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -52,6 +52,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_mipmap_tree.h"
#include "radeon_reg.h"
+struct r600_cs_manager_legacy
+{
+ struct radeon_cs_manager base;
+ struct radeon_context *ctx;
+ /* hack for scratch stuff */
+ uint32_t pending_age;
+ uint32_t pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+ struct radeon_cs_reloc base;
+ uint32_t cindices;
+ uint32_t *indices;
+ uint32_t *reloc_indices;
+};
static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
index eba43d37b6b..dff00096999 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
@@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R600_IT_SET_CTL_CONST 0x00006F00
#define R600_IT_SURFACE_BASE_UPDATE 0x00007300
-struct r600_cs_manager_legacy
-{
- struct radeon_cs_manager base;
- struct radeon_context *ctx;
- /* hack for scratch stuff */
- uint32_t pending_age;
- uint32_t pending_count;
-};
-
-struct r600_cs_reloc_legacy {
- struct radeon_cs_reloc base;
- uint32_t cindices;
- uint32_t *indices;
- uint32_t *reloc_indices;
-};
-
struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
/**
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 25314eff563..b66fe78ac3b 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -317,20 +317,8 @@ static void r600InitGLExtensions(GLcontext *ctx)
#ifdef R600_ENABLE_GLSL_TEST
driInitExtensions(ctx, gl_20_extension, GL_TRUE);
- //_mesa_enable_2_0_extensions(ctx);
- //1.5
- ctx->Extensions.ARB_occlusion_query = GL_TRUE;
- ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE;
- ctx->Extensions.EXT_shadow_funcs = GL_TRUE;
- //2.0
- ctx->Extensions.ARB_draw_buffers = GL_TRUE;
- ctx->Extensions.ARB_point_sprite = GL_TRUE;
- ctx->Extensions.ARB_shader_objects = GL_TRUE;
- ctx->Extensions.ARB_vertex_shader = GL_TRUE;
- ctx->Extensions.ARB_fragment_shader = GL_TRUE;
- ctx->Extensions.EXT_blend_equation_separate = GL_TRUE;
- ctx->Extensions.ATI_separate_stencil = GL_TRUE;
-
+ _mesa_enable_2_0_extensions(ctx);
+
/* glsl compiler has problem if this is not GL_TRUE */
ctx->Shader.EmitCondCodes = GL_TRUE;
#endif /* R600_ENABLE_GLSL_TEST */
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index e84f5245253..e10b23b97f1 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -539,12 +539,15 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700
pAsm->unNumPresub = 0;
pAsm->unCurNumILInsts = 0;
+ pAsm->unVetTexBits = 0;
+
return 0;
}
GLboolean IsTex(gl_inst_opcode Opcode)
{
- if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+ if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
+ (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
{
return GL_TRUE;
}
@@ -1412,43 +1415,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
break;
case PROGRAM_INPUT:
- switch (pILInst->SrcReg[0].Index)
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case VERT_ATTRIB_TEX0:
+ case VERT_ATTRIB_TEX1:
+ case VERT_ATTRIB_TEX2:
+ case VERT_ATTRIB_TEX3:
+ case VERT_ATTRIB_TEX4:
+ case VERT_ATTRIB_TEX5:
+ case VERT_ATTRIB_TEX6:
+ case VERT_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ }
+ }
+ else
{
- case FRAG_ATTRIB_WPOS:
- case FRAG_ATTRIB_COL0:
- case FRAG_ATTRIB_COL1:
- case FRAG_ATTRIB_FOGC:
- case FRAG_ATTRIB_TEX0:
- case FRAG_ATTRIB_TEX1:
- case FRAG_ATTRIB_TEX2:
- case FRAG_ATTRIB_TEX3:
- case FRAG_ATTRIB_TEX4:
- case FRAG_ATTRIB_TEX5:
- case FRAG_ATTRIB_TEX6:
- case FRAG_ATTRIB_TEX7:
- bValidTexCoord = GL_TRUE;
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case FRAG_ATTRIB_WPOS:
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ case FRAG_ATTRIB_FOGC:
+ case FRAG_ATTRIB_TEX0:
+ case FRAG_ATTRIB_TEX1:
+ case FRAG_ATTRIB_TEX2:
+ case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX4:
+ case FRAG_ATTRIB_TEX5:
+ case FRAG_ATTRIB_TEX6:
+ case FRAG_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ case FRAG_ATTRIB_FACE:
+ fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+ break;
+ case FRAG_ATTRIB_PNTC:
+ fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+ break;
+ }
+
+ if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+ (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+ {
+ bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
pAsm->S[0].src.rtype = SRC_REG_INPUT;
- break;
- case FRAG_ATTRIB_FACE:
- fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
- break;
- case FRAG_ATTRIB_PNTC:
- fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
- break;
- }
-
- if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
- (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
- {
- bValidTexCoord = GL_TRUE;
- pAsm->S[0].src.reg =
- pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
- pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ }
}
- break;
+ break;
}
}
@@ -1493,8 +1518,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+ tex_instruction_ptr->m_Word0.f.alt_const = 0;
- tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
+ pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+ }
+ else
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ }
tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
if (normalized) {
@@ -1513,7 +1547,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
-
tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
// dst
@@ -4331,13 +4364,20 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
}
- if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
+ switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
{
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
- }
- else
- {
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ case OPCODE_DDX:
+ /* will these need WQM(1) on CF inst ? */
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
+ break;
+ case OPCODE_DDY:
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
+ break;
+ case OPCODE_TXB:
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+ break;
+ default:
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
}
pAsm->is_tex = GL_TRUE;
@@ -5650,7 +5690,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
}
}
break;
-
+ case OPCODE_DDX:
+ case OPCODE_DDY:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index 6ef945dfda3..dbd9860f7d0 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -485,6 +485,8 @@ typedef struct r700_AssemblerBase
GLuint unNumPresub;
GLuint unCurNumILInsts;
+ GLuint unVetTexBits;
+
} r700_AssemblerBase;
//Internal use
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index ee2a0a4c8a6..c124e02184f 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -45,6 +45,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ struct r700_vertex_program *vp = context->selected_vp;
+
struct radeon_bo *bo = NULL;
unsigned int i;
BATCH_LOCALS(&context->radeon);
@@ -52,7 +55,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
radeonTexObj *t = r700->textures[i];
uint32_t offset;
if (t) {
@@ -71,7 +74,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
- R600_OUT_BATCH(i * 7);
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+ }
+ else
+ {
+ R600_OUT_BATCH(i * 7);
+ }
+
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
@@ -95,21 +107,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
}
}
+#define SAMPLER_STRIDE 3
+
static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
unsigned int i;
+
+ struct r700_vertex_program *vp = context->selected_vp;
+
BATCH_LOCALS(&context->radeon);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
radeonTexObj *t = r700->textures[i];
if (t) {
BEGIN_BATCH_NO_AUTOSTATE(5);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
- R600_OUT_BATCH(i * 3);
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1
+ }
+ else
+ {
+ R600_OUT_BATCH(i * 3);
+ }
+
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index d15f0137107..ca0710b6813 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -34,6 +34,7 @@
#include "main/imports.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
+#include "shader/program.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
@@ -42,6 +43,54 @@
#include "r700_debug.h"
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
+{
+ static const gl_state_index winstate[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+ struct prog_instruction *newInst, *inst;
+ GLint win_size; /* state reference */
+ GLuint wpos_temp; /* temp register */
+ int i, j;
+
+ /* PARAM win_size = STATE_FB_SIZE */
+ win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
+
+ wpos_temp = fprog->Base.NumTemporaries++;
+
+ /* scan program where WPOS is used and replace with wpos_temp */
+ inst = fprog->Base.Instructions;
+ for (i = 0; i < fprog->Base.NumInstructions; i++) {
+ for (j=0; j < 3; j++) {
+ if(inst->SrcReg[j].File == PROGRAM_INPUT &&
+ inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
+ inst->SrcReg[j].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[j].Index = wpos_temp;
+ }
+ }
+ inst++;
+ }
+
+ _mesa_insert_instructions(&(fprog->Base), 0, 1);
+
+ newInst = fprog->Base.Instructions;
+ /* invert wpos.y
+ * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
+ newInst[0].Opcode = OPCODE_ADD;
+ newInst[0].DstReg.File = PROGRAM_TEMPORARY;
+ newInst[0].DstReg.Index = wpos_temp;
+ newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
+
+ newInst[0].SrcReg[0].File = PROGRAM_INPUT;
+ newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ newInst[0].SrcReg[0].Negate = NEGATE_Y;
+
+ newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
+ newInst[0].SrcReg[1].Index = win_size;
+ newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+
+}
+
//TODO : Validate FP input with VP output.
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
struct gl_fragment_program *mesa_fp,
@@ -155,6 +204,12 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
}
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
+ }
+
/* Map temporary registers (GPRs) */
pAsm->starting_temp_register_number = pAsm->number_used_registers;
@@ -312,7 +367,13 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
//Init_Program
Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
- Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
+
+ if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
+ {
+ insert_wpos_code(ctx, mesa_fp);
+ }
+
+ Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
{
@@ -479,6 +540,21 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
}
+ if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
+ {
+ ui++;
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
+ //SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+ }
+ else
+ {
+ CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ }
+
+
ui = (unNumOfReg < ui) ? ui : unNumOfReg;
SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
@@ -498,6 +574,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
struct r700_vertex_program_cont *vpc =
(struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
+ for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+ r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
+
unBit = 1 << FRAG_ATTRIB_WPOS;
if(mesa_fp->Base.InputsRead & unBit)
{
@@ -575,6 +655,22 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
else
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+ }
+
+
+
for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
{
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h
index e562bfa4789..39c59c9201d 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.h
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.h
@@ -48,6 +48,8 @@ struct r700_fragment_program
};
/* Internal */
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog);
+
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
struct gl_fragment_program *mesa_fp,
GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h
index 2829cca0a3c..cdb9a570f7c 100644
--- a/src/mesa/drivers/dri/r600/r700_shaderinst.h
+++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h
@@ -42,6 +42,13 @@
#define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0
#define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0
+//richard dec.10 glsl
+#define SQ_TEX_SAMPLER_PS_OFFSET 0x00000000
+#define SQ_TEX_SAMPLER_PS_COUNT 0x00000012
+#define SQ_TEX_SAMPLER_VS_OFFSET 0x00000012
+#define SQ_TEX_SAMPLER_VS_COUNT 0x00000012
+//-------------------
+
#define SHADERINST_TYPEMASK_CF 0x10
#define SHADERINST_TYPEMASK_ALU 0x20
#define SHADERINST_TYPEMASK_TEX 0x40
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index 49a9ec56106..0739496e032 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -406,9 +406,6 @@ struct radeon_state {
struct radeon_depthbuffer_state depth;
struct radeon_scissor_state scissor;
struct radeon_stencilbuffer_state stencil;
-
- struct radeon_cs_space_check bos[RADEON_MAX_BOS];
- int validated_bo_count;
};
/**
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
index b8c65f4ce62..d31e4e47ddb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_dma.c
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
@@ -205,7 +205,6 @@ again_alloc:
counter on unused buffers for later freeing them from
begin of list */
dma_bo = last_elem(&rmesa->dma.free);
- assert(dma_bo->bo->cref == 1);
remove_from_list(dma_bo);
insert_at_head(&rmesa->dma.reserved, dma_bo);
}